]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: automatically handle commuting ops in rewrite rules
authorKeith Randall <khr@golang.org>
Thu, 30 Mar 2017 03:30:22 +0000 (03:30 +0000)
committerKeith Randall <khr@golang.org>
Mon, 3 Apr 2017 22:03:43 +0000 (22:03 +0000)
Note that this is a redo of an undo of the original buggy CL 38666.

We have lots of rewrite rules that vary only in the fact that
we have 2 versions for the 2 different orderings of various
commuting ops. For example:

(ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
(ADDL (MOVLconst [c]) x) -> (ADDLconst [c] x)

It can get unwieldly quickly, especially when there is more than
one commuting op in a rule.

Our existing "fix" for this problem is to have rules that
canonicalize the operations first. For example:

(Eq64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Eq64 (Const64 <t> [c]) x)

Subsequent rules can then assume if there is a constant arg to Eq64,
it will be the first one. This fix kinda works, but it is fragile and
only works when we remember to include the required extra rules.

The fundamental problem is that the rule matcher doesn't
know anything about commuting ops. This CL fixes that fact.

We already have information about which ops commute. (The register
allocator takes advantage of commutivity.)  The rule generator now
automatically generates multiple rules for a single source rule when
there are commutative ops in the rule. We can now drop all of our
almost-duplicate source-level rules and the canonicalization rules.

I have some CLs in progress that will be a lot less verbose when
the rule generator handles commutivity for me.

I had to reorganize the load-combining rules a bit. The 8-way OR rules
generated 128 different reorderings, which was causing the generator
to put too much code in the rewrite*.go files (the big ones were going
from 25K lines to 132K lines). Instead I reorganized the rules to
combine pairs of loads at a time. The generated rule files are now
actually a bit (5%) smaller.

Make.bash times are ~unchanged.

Compiler benchmarks are not observably different. Probably because
we don't spend much compiler time in rule matching anyway.

I've also done a pass over all of our ops adding commutative markings
for ops which hadn't had them previously.

Fixes #18292

Change-Id: Ic1c0e43fbf579539f459971625f69690c9ab8805
Reviewed-on: https://go-review.googlesource.com/38801
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
26 files changed:
src/cmd/compile/internal/ssa/gen/386.rules
src/cmd/compile/internal/ssa/gen/386Ops.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/gen/ARM.rules
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/MIPS.rules
src/cmd/compile/internal/ssa/gen/MIPS64.rules
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/PPC64Ops.go
src/cmd/compile/internal/ssa/gen/S390X.rules
src/cmd/compile/internal/ssa/gen/S390XOps.go
src/cmd/compile/internal/ssa/gen/generic.rules
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/gen/rulegen.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewrite386.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssa/rewriteARM.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/compile/internal/ssa/rewriteMIPS.go
src/cmd/compile/internal/ssa/rewriteMIPS64.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/cmd/compile/internal/ssa/rewriteS390X.go
src/cmd/compile/internal/ssa/rewritedec.go
src/cmd/compile/internal/ssa/rewritegeneric.go

index 13d9bb935f3b71e49d8372dc312d762ce3108f24..c67796ea09ce95cff328d707f6a0fef06148ca90 100644 (file)
 
 // fold constants into instructions
 (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
-(ADDL (MOVLconst [c]) x) -> (ADDLconst [c] x)
 (ADDLcarry x (MOVLconst [c])) -> (ADDLconstcarry [c] x)
-(ADDLcarry (MOVLconst [c]) x) -> (ADDLconstcarry [c] x)
 (ADCL x (MOVLconst [c]) f) -> (ADCLconst [c] x f)
 (ADCL (MOVLconst [c]) x f) -> (ADCLconst [c] x f)
 
 (SBBL x (MOVLconst [c]) f) -> (SBBLconst [c] x f)
 
 (MULL x (MOVLconst [c])) -> (MULLconst [c] x)
-(MULL (MOVLconst [c]) x) -> (MULLconst [c] x)
 
 (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x)
-(ANDL (MOVLconst [c]) x) -> (ANDLconst [c] x)
 
 (ANDLconst [c] (ANDLconst [d] x)) -> (ANDLconst [c & d] x)
 
 (MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x)
 
 (ORL x (MOVLconst [c])) -> (ORLconst [c] x)
-(ORL (MOVLconst [c]) x) -> (ORLconst [c] x)
 
 (XORL x (MOVLconst [c])) -> (XORLconst [c] x)
-(XORL (MOVLconst [c]) x) -> (XORLconst [c] x)
 
 (SHLL x (MOVLconst [c])) -> (SHLLconst [c&31] x)
 (SHRL x (MOVLconst [c])) -> (SHRLconst [c&31] x)
 
 // Rotate instructions
 
-(ADDL (SHLLconst [c] x) (SHRLconst [32-c] x)) -> (ROLLconst [c   ] x)
-( ORL (SHLLconst [c] x) (SHRLconst [32-c] x)) -> (ROLLconst [c   ] x)
-(XORL (SHLLconst [c] x) (SHRLconst [32-c] x)) -> (ROLLconst [c   ] x)
-(ADDL (SHRLconst [c] x) (SHLLconst [32-c] x)) -> (ROLLconst [32-c] x)
-( ORL (SHRLconst [c] x) (SHLLconst [32-c] x)) -> (ROLLconst [32-c] x)
-(XORL (SHRLconst [c] x) (SHLLconst [32-c] x)) -> (ROLLconst [32-c] x)
-
-(ADDL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-( ORL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-(XORL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-(ADDL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-( ORL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-(XORL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-
-(ADDL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-( ORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-(XORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-(ADDL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
-( ORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
-(XORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
+(ADDL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
+( ORL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
+(XORL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
+
+(ADDL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
+( ORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
+(XORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
+
+(ADDL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
+( ORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
+(XORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
 
 (ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x)
 (ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x)
 (MULLconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x)
 (MULLconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x)
 (MULLconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x)
-(MULLconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3)-> (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
-(MULLconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5)-> (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
-(MULLconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9)-> (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
+(MULLconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
+(MULLconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
+(MULLconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
 
 // combine add/shift into LEAL
 (ADDL x (SHLLconst [3] y)) -> (LEAL8 x y)
 (ADDL x (SHLLconst [1] y)) -> (LEAL2 x y)
 (ADDL x (ADDL y y)) -> (LEAL2 x y)
 (ADDL x (ADDL x y)) -> (LEAL2 y x)
-(ADDL x (ADDL y x)) -> (LEAL2 y x)
 
 // combine ADDL/ADDLconst into LEAL1
 (ADDLconst [c] (ADDL x y)) -> (LEAL1 [c] x y)
 (ADDL (ADDLconst [c] x) y) -> (LEAL1 [c] x y)
-(ADDL x (ADDLconst [c] y)) -> (LEAL1 [c] x y)
 
 // fold ADDL into LEAL
 (ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
 (LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
 (LEAL [c] {s} (ADDL x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAL1 [c] {s} x y)
 (ADDL x (LEAL [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAL1 [c] {s} x y)
-(ADDL (LEAL [c] {s} x) y) && x.Op != OpSB && y.Op != OpSB -> (LEAL1 [c] {s} x y)
 
 // fold ADDLconst into LEALx
 (ADDLconst [c] (LEAL1 [d] {s} x y)) && is32Bit(c+d) -> (LEAL1 [c+d] {s} x y)
 (ADDLconst [c] (LEAL4 [d] {s} x y)) && is32Bit(c+d) -> (LEAL4 [c+d] {s} x y)
 (ADDLconst [c] (LEAL8 [d] {s} x y)) && is32Bit(c+d) -> (LEAL8 [c+d] {s} x y)
 (LEAL1 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAL1 [c+d] {s} x y)
-(LEAL1 [c] {s} x (ADDLconst [d] y)) && is32Bit(c+d)   && y.Op != OpSB -> (LEAL1 [c+d] {s} x y)
 (LEAL2 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAL2 [c+d] {s} x y)
 (LEAL2 [c] {s} x (ADDLconst [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEAL2 [c+2*d] {s} x y)
 (LEAL4 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAL4 [c+d] {s} x y)
 
 // fold shifts into LEALx
 (LEAL1 [c] {s} x (SHLLconst [1] y)) -> (LEAL2 [c] {s} x y)
-(LEAL1 [c] {s} (SHLLconst [1] x) y) -> (LEAL2 [c] {s} y x)
 (LEAL1 [c] {s} x (SHLLconst [2] y)) -> (LEAL4 [c] {s} x y)
-(LEAL1 [c] {s} (SHLLconst [2] x) y) -> (LEAL4 [c] {s} y x)
 (LEAL1 [c] {s} x (SHLLconst [3] y)) -> (LEAL8 [c] {s} x y)
-(LEAL1 [c] {s} (SHLLconst [3] x) y) -> (LEAL8 [c] {s} y x)
-
 (LEAL2 [c] {s} x (SHLLconst [1] y)) -> (LEAL4 [c] {s} x y)
 (LEAL2 [c] {s} x (SHLLconst [2] y)) -> (LEAL8 [c] {s} x y)
 (LEAL4 [c] {s} x (SHLLconst [1] y)) -> (LEAL8 [c] {s} x y)
 // LEAL into LEAL1
 (LEAL1 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
        (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
-(LEAL1 [off1] {sym1} x (LEAL [off2] {sym2} y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB ->
-       (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
 
 // LEAL1 into LEAL
 (LEAL [off1] {sym1} (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
 (CMPWconst x [0]) -> (TESTW x x)
 (CMPBconst x [0]) -> (TESTB x x)
 
-// Move shifts to second argument of ORs.  Helps load combining rules below.
-(ORL x:(SHLLconst _) y) && y.Op != Op386SHLLconst -> (ORL y x)
-
 // Combining byte loads into larger (unaligned) loads.
 // There are many ways these combinations could occur.  This is
 // designed to match the way encoding/binary.LittleEndian does it.
-(ORL                  x0:(MOVBload [i]   {s} p mem)
-    s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
+(ORL                  x0:(MOVBload [i0] {s} p mem)
+    s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+  && i1 == i0+1
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
+  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
 
 (ORL o0:(ORL
-                       x0:(MOVWload [i]   {s} p mem)
-    s0:(SHLLconst [16] x1:(MOVBload [i+2] {s} p mem)))
-    s1:(SHLLconst [24] x2:(MOVBload [i+3] {s} p mem)))
+                       x0:(MOVWload [i0] {s} p mem)
+    s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)))
+    s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
+  && i2 == i0+2
+  && i3 == i0+3
   && x0.Uses == 1
   && x1.Uses == 1
   && x2.Uses == 1
   && clobber(s0)
   && clobber(s1)
   && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p mem)
+  -> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
 
-(ORL                  x0:(MOVBloadidx1 [i]   {s} p idx mem)
-    s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
+(ORL                  x0:(MOVBloadidx1 [i0] {s} p idx mem)
+    s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+  && i1==i0+1
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
+  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
 
 (ORL o0:(ORL
-                       x0:(MOVWloadidx1 [i]   {s} p idx mem)
-    s0:(SHLLconst [16] x1:(MOVBloadidx1 [i+2] {s} p idx mem)))
-    s1:(SHLLconst [24] x2:(MOVBloadidx1 [i+3] {s} p idx mem)))
+                       x0:(MOVWloadidx1 [i0] {s} p idx mem)
+    s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)))
+    s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+  && i2 == i0+2
+  && i3 == i0+3
   && x0.Uses == 1
   && x1.Uses == 1
   && x2.Uses == 1
   && clobber(s0)
   && clobber(s1)
   && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
+  -> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
 
 // Combine constant stores into larger (unaligned) stores.
 (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
index 2a638ff1edbec75d59ba17110c3926881a25224d..b287775194ffdd2cfafe593ba5d69ca8d9f30520 100644 (file)
@@ -193,10 +193,10 @@ func init() {
                {name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
                {name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
 
-               {name: "HMULL", argLength: 2, reg: gp21hmul, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
-               {name: "HMULLU", argLength: 2, reg: gp21hmul, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULL", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULLU", argLength: 2, reg: gp21hmul, commutative: true, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
 
-               {name: "MULLQU", argLength: 2, reg: gp21mul, asm: "MULL", clobberFlags: true}, // arg0 * arg1, high 32 in result[0], low 32 in result[1]
+               {name: "MULLQU", argLength: 2, reg: gp21mul, commutative: true, asm: "MULL", clobberFlags: true}, // arg0 * arg1, high 32 in result[0], low 32 in result[1]
 
                {name: "AVGLU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 + arg1) / 2 as unsigned, all 32 result bits
 
@@ -229,9 +229,9 @@ func init() {
                {name: "UCOMISS", argLength: 2, reg: fp2flags, asm: "UCOMISS", typ: "Flags", usesScratch: true}, // arg0 compare to arg1, f32
                {name: "UCOMISD", argLength: 2, reg: fp2flags, asm: "UCOMISD", typ: "Flags", usesScratch: true}, // arg0 compare to arg1, f64
 
-               {name: "TESTL", argLength: 2, reg: gp2flags, asm: "TESTL", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-               {name: "TESTW", argLength: 2, reg: gp2flags, asm: "TESTW", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-               {name: "TESTB", argLength: 2, reg: gp2flags, asm: "TESTB", typ: "Flags"},                    // (arg0 & arg1) compare to 0
+               {name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0
+               {name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0
+               {name: "TESTB", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTB", typ: "Flags"}, // (arg0 & arg1) compare to 0
                {name: "TESTLconst", argLength: 1, reg: gp1flags, asm: "TESTL", typ: "Flags", aux: "Int32"}, // (arg0 & auxint) compare to 0
                {name: "TESTWconst", argLength: 1, reg: gp1flags, asm: "TESTW", typ: "Flags", aux: "Int16"}, // (arg0 & auxint) compare to 0
                {name: "TESTBconst", argLength: 1, reg: gp1flags, asm: "TESTB", typ: "Flags", aux: "Int8"},  // (arg0 & auxint) compare to 0
@@ -314,7 +314,7 @@ func init() {
                {name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation.
 
                {name: "LEAL", argLength: 1, reg: gp11sb, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux
-               {name: "LEAL1", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                         // arg0 + arg1 + auxint + aux
+               {name: "LEAL1", argLength: 2, reg: gp21sb, commutative: true, aux: "SymOff", symEffect: "Addr"},      // arg0 + arg1 + auxint + aux
                {name: "LEAL2", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                         // arg0 + 2*arg1 + auxint + aux
                {name: "LEAL4", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                         // arg0 + 4*arg1 + auxint + aux
                {name: "LEAL8", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                         // arg0 + 8*arg1 + auxint + aux
@@ -331,17 +331,17 @@ func init() {
                {name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},    // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
 
                // indexed loads/stores
-               {name: "MOVBloadidx1", argLength: 3, reg: gploadidx, asm: "MOVBLZX", aux: "SymOff", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVWloadidx1", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
-               {name: "MOVLloadidx1", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", symEffect: "Read"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", symEffect: "Read"},    // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
+               {name: "MOVBloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBLZX", aux: "SymOff", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVWloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"},                    // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
+               {name: "MOVLloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVL", aux: "SymOff", symEffect: "Read"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", symEffect: "Read"},                       // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
                // TODO: sign-extending indexed loads
-               {name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff", symEffect: "Write"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
-               {name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
+               {name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", aux: "SymOff", symEffect: "Write"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"},                    // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
+               {name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"},                    // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
                // TODO: add size-mismatched indexed loads, like MOVBstoreidx4.
 
                // For storeconst ops, the AuxInt field encodes both
index 2e3e6c01ba91b7404e7b7be183a0854a37eb62cb..ac45cd71e51b4403a976a76497ee02fd56ebaa51 100644 (file)
 (NE (TESTB (SETA  cmp) (SETA  cmp)) yes no) -> (UGT cmp yes no)
 (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no)
 
-// Normalize TESTx argument order for BTx rewrites below.
-(TESTQ y x:(SHLQ _ _)) && y.Op != OpAMD64SHLQ -> (TESTQ x y)
-(TESTL y x:(SHLL _ _)) && y.Op != OpAMD64SHLL -> (TESTL x y)
-
 // Recognize bit tests: a&(1<<b) != 0 for b suitably bounded
 // Note that ULT and SETB check the carry flag; they are identical to CS and SETCS.
 // Same, mutatis mutandis, for UGE and SETAE, and CC and SETCC.
 
 // fold constants into instructions
 (ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x)
-(ADDQ (MOVQconst [c]) x) && is32Bit(c) -> (ADDQconst [c] x)
 (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
-(ADDL (MOVLconst [c]) x) -> (ADDLconst [c] x)
 
 (SUBQ x (MOVQconst [c])) && is32Bit(c) -> (SUBQconst x [c])
 (SUBQ (MOVQconst [c]) x) && is32Bit(c) -> (NEGQ (SUBQconst <v.Type> x [c]))
 (SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c]))
 
 (MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x)
-(MULQ (MOVQconst [c]) x) && is32Bit(c) -> (MULQconst [c] x)
 (MULL x (MOVLconst [c])) -> (MULLconst [c] x)
-(MULL (MOVLconst [c]) x) -> (MULLconst [c] x)
 
 (ANDQ x (MOVQconst [c])) && is32Bit(c) -> (ANDQconst [c] x)
-(ANDQ (MOVQconst [c]) x) && is32Bit(c) -> (ANDQconst [c] x)
 (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x)
-(ANDL (MOVLconst [c]) x) -> (ANDLconst [c] x)
 
 (ANDLconst [c] (ANDLconst [d] x)) -> (ANDLconst [c & d] x)
 (ANDQconst [c] (ANDQconst [d] x)) -> (ANDQconst [c & d] x)
 (MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x)
 
 (ORQ x (MOVQconst [c])) && is32Bit(c) -> (ORQconst [c] x)
-(ORQ (MOVQconst [c]) x) && is32Bit(c) -> (ORQconst [c] x)
 (ORL x (MOVLconst [c])) -> (ORLconst [c] x)
-(ORL (MOVLconst [c]) x) -> (ORLconst [c] x)
 
 (XORQ x (MOVQconst [c])) && is32Bit(c) -> (XORQconst [c] x)
-(XORQ (MOVQconst [c]) x) && is32Bit(c) -> (XORQconst [c] x)
 (XORL x (MOVLconst [c])) -> (XORLconst [c] x)
-(XORL (MOVLconst [c]) x) -> (XORLconst [c] x)
 
 (SHLQ x (MOVQconst [c])) -> (SHLQconst [c&63] x)
 (SHLQ x (MOVLconst [c])) -> (SHLQconst [c&63] x)
 
 // Rotate instructions
 
-(ADDQ (SHLQconst x [c]) (SHRQconst x [64-c])) -> (ROLQconst x [   c])
-( ORQ (SHLQconst x [c]) (SHRQconst x [64-c])) -> (ROLQconst x [   c])
-(XORQ (SHLQconst x [c]) (SHRQconst x [64-c])) -> (ROLQconst x [   c])
-(ADDQ (SHRQconst x [c]) (SHLQconst x [64-c])) -> (ROLQconst x [64-c])
-( ORQ (SHRQconst x [c]) (SHLQconst x [64-c])) -> (ROLQconst x [64-c])
-(XORQ (SHRQconst x [c]) (SHLQconst x [64-c])) -> (ROLQconst x [64-c])
-
-(ADDL (SHLLconst x [c]) (SHRLconst x [32-c])) -> (ROLLconst x [   c])
-( ORL (SHLLconst x [c]) (SHRLconst x [32-c])) -> (ROLLconst x [   c])
-(XORL (SHLLconst x [c]) (SHRLconst x [32-c])) -> (ROLLconst x [   c])
-(ADDL (SHRLconst x [c]) (SHLLconst x [32-c])) -> (ROLLconst x [32-c])
-( ORL (SHRLconst x [c]) (SHLLconst x [32-c])) -> (ROLLconst x [32-c])
-(XORL (SHRLconst x [c]) (SHLLconst x [32-c])) -> (ROLLconst x [32-c])
-
-(ADDL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-( ORL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-(XORL <t> (SHLLconst x [c]) (SHRWconst x [16-c])) && c < 16 && t.Size() == 2 -> (ROLWconst x [   c])
-(ADDL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-( ORL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-(XORL <t> (SHRWconst x [c]) (SHLLconst x [16-c])) && c > 0  && t.Size() == 2 -> (ROLWconst x [16-c])
-
-(ADDL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-( ORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-(XORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c])) && c < 8 && t.Size() == 1 -> (ROLBconst x [   c])
-(ADDL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
-( ORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
-(XORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c])) && c > 0 && t.Size() == 1 -> (ROLBconst x [ 8-c])
+(ADDQ (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
+( ORQ (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
+(XORQ (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
+
+(ADDL (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c])
+( ORL (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c])
+(XORL (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c])
+
+(ADDL <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c])
+( ORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c])
+(XORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c])
+
+(ADDL <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c  && c < 8 && t.Size() == 1 -> (ROLBconst x [c])
+( ORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c  && c < 8 && t.Size() == 1 -> (ROLBconst x [c])
+(XORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c  && c < 8 && t.Size() == 1 -> (ROLBconst x [c])
 
 (ROLQconst [c] (ROLQconst [d] x)) -> (ROLQconst [(c+d)&63] x)
 (ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x)
 (MULQconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
 (MULQconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
 (MULQconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
-(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3)-> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
-(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5)-> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
-(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9)-> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
+(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
+(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
+(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
 
 // combine add/shift into LEAQ
 (ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y)
 (ADDQ x (SHLQconst [1] y)) -> (LEAQ2 x y)
 (ADDQ x (ADDQ y y)) -> (LEAQ2 x y)
 (ADDQ x (ADDQ x y)) -> (LEAQ2 y x)
-(ADDQ x (ADDQ y x)) -> (LEAQ2 y x)
 
 // combine ADDQ/ADDQconst into LEAQ1
 (ADDQconst [c] (ADDQ x y)) -> (LEAQ1 [c] x y)
 (ADDQ (ADDQconst [c] x) y) -> (LEAQ1 [c] x y)
-(ADDQ x (ADDQconst [c] y)) -> (LEAQ1 [c] x y)
 
 // fold ADDQ into LEAQ
 (ADDQconst [c] (LEAQ [d] {s} x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x)
 (LEAQ [c] {s} (ADDQconst [d] x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x)
 (LEAQ [c] {s} (ADDQ x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
 (ADDQ x (LEAQ [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
-(ADDQ (LEAQ [c] {s} x) y) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
 
 // fold ADDQconst into LEAQx
 (ADDQconst [c] (LEAQ1 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ1 [c+d] {s} x y)
 (ADDQconst [c] (LEAQ4 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ4 [c+d] {s} x y)
 (ADDQconst [c] (LEAQ8 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ8 [c+d] {s} x y)
 (LEAQ1 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ1 [c+d] {s} x y)
-(LEAQ1 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+d)   && y.Op != OpSB -> (LEAQ1 [c+d] {s} x y)
 (LEAQ2 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ2 [c+d] {s} x y)
 (LEAQ2 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEAQ2 [c+2*d] {s} x y)
 (LEAQ4 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ4 [c+d] {s} x y)
 
 // fold shifts into LEAQx
 (LEAQ1 [c] {s} x (SHLQconst [1] y)) -> (LEAQ2 [c] {s} x y)
-(LEAQ1 [c] {s} (SHLQconst [1] x) y) -> (LEAQ2 [c] {s} y x)
 (LEAQ1 [c] {s} x (SHLQconst [2] y)) -> (LEAQ4 [c] {s} x y)
-(LEAQ1 [c] {s} (SHLQconst [2] x) y) -> (LEAQ4 [c] {s} y x)
 (LEAQ1 [c] {s} x (SHLQconst [3] y)) -> (LEAQ8 [c] {s} x y)
-(LEAQ1 [c] {s} (SHLQconst [3] x) y) -> (LEAQ8 [c] {s} y x)
-
 (LEAQ2 [c] {s} x (SHLQconst [1] y)) -> (LEAQ4 [c] {s} x y)
 (LEAQ2 [c] {s} x (SHLQconst [2] y)) -> (LEAQ8 [c] {s} x y)
 (LEAQ4 [c] {s} x (SHLQconst [1] y)) -> (LEAQ8 [c] {s} x y)
 // LEAQ into LEAQ1
 (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
        (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
-(LEAQ1 [off1] {sym1} x (LEAQ [off2] {sym2} y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB ->
-       (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
 
 // LEAQ1 into LEAQ
 (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
 (TESTL (MOVLconst [c]) x) -> (TESTLconst [c] x)
 (TESTW (MOVLconst [c]) x) -> (TESTWconst [c] x)
 (TESTB (MOVLconst [c]) x) -> (TESTBconst [c] x)
-(TESTQ x (MOVQconst [c])) && is32Bit(c) -> (TESTQconst [c] x)
-(TESTL x (MOVLconst [c])) -> (TESTLconst [c] x)
-(TESTW x (MOVLconst [c])) -> (TESTWconst [c] x)
-(TESTB x (MOVLconst [c])) -> (TESTBconst [c] x)
 
 // TEST %reg,%reg is shorter than CMP
 (CMPQconst x [0]) -> (TESTQ x x)
 (CMPWconst x [0]) -> (TESTW x x)
 (CMPBconst x [0]) -> (TESTB x x)
 
-// Move shifts to second argument of ORs.  Helps load combining rules below.
-(ORQ x:(SHLQconst _) y) && y.Op != OpAMD64SHLQconst -> (ORQ y x)
-(ORL x:(SHLLconst _) y) && y.Op != OpAMD64SHLLconst -> (ORL y x)
-
 // Combining byte loads into larger (unaligned) loads.
 // There are many ways these combinations could occur.  This is
 // designed to match the way encoding/binary.LittleEndian does it.
-(ORL                  x0:(MOVBload [i]   {s} p mem)
-    s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
+
+// Little-endian loads
+
+(ORL                  x0:(MOVBload [i0] {s} p mem)
+    sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+  && i1 == i0+1
   && x0.Uses == 1
   && x1.Uses == 1
-  && s0.Uses == 1
+  && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
 
-(ORL o0:(ORL
-                       x0:(MOVWload [i]   {s} p mem)
-    s0:(SHLLconst [16] x1:(MOVBload [i+2] {s} p mem)))
-    s1:(SHLLconst [24] x2:(MOVBload [i+3] {s} p mem)))
+(ORQ                  x0:(MOVBload [i0] {s} p mem)
+    sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+
+(ORL                   x0:(MOVWload [i0] {s} p mem)
+    sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+
+(ORQ                   x0:(MOVWload [i0] {s} p mem)
+    sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+
+(ORQ                   x0:(MOVLload [i0] {s} p mem)
+    sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
+
+(ORL
+    s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
+    or:(ORL
+        s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p mem)
-
-(ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ
-                       x0:(MOVBload [i]   {s} p mem)
-    s0:(SHLQconst [8]  x1:(MOVBload [i+1] {s} p mem)))
-    s1:(SHLQconst [16] x2:(MOVBload [i+2] {s} p mem)))
-    s2:(SHLQconst [24] x3:(MOVBload [i+3] {s} p mem)))
-    s3:(SHLQconst [32] x4:(MOVBload [i+4] {s} p mem)))
-    s4:(SHLQconst [40] x5:(MOVBload [i+5] {s} p mem)))
-    s5:(SHLQconst [48] x6:(MOVBload [i+6] {s} p mem)))
-    s6:(SHLQconst [56] x7:(MOVBload [i+7] {s} p mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+
+(ORQ
+    s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
+    or:(ORQ
+        s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem)
-
-(ORL                  x0:(MOVBloadidx1 [i]   {s} p idx mem)
-    s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+
+(ORQ
+    s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem))
+    or:(ORQ
+        s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))
+       y))
+  && i1 == i0+2
+  && j1 == j0+16
+  && j0 % 32 == 0
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
 
-(ORL o0:(ORL
-                       x0:(MOVWloadidx1 [i]   {s} p idx mem)
-    s0:(SHLLconst [16] x1:(MOVBloadidx1 [i+2] {s} p idx mem)))
-    s1:(SHLLconst [24] x2:(MOVBloadidx1 [i+3] {s} p idx mem)))
+// Little-endian indexed loads
+
+(ORL                  x0:(MOVBloadidx1 [i0] {s} p idx mem)
+    sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+
+(ORQ                  x0:(MOVBloadidx1 [i0] {s} p idx mem)
+    sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+
+(ORL                   x0:(MOVWloadidx1 [i0] {s} p idx mem)
+    sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+
+(ORQ                   x0:(MOVWloadidx1 [i0] {s} p idx mem)
+    sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+
+(ORQ                   x0:(MOVLloadidx1 [i0] {s} p idx mem)
+    sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+
+(ORL
+    s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
+    or:(ORL
+        s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
-
-(ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ
-                       x0:(MOVBloadidx1 [i]   {s} p idx mem)
-    s0:(SHLQconst [8]  x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
-    s1:(SHLQconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem)))
-    s2:(SHLQconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem)))
-    s3:(SHLQconst [32] x4:(MOVBloadidx1 [i+4] {s} p idx mem)))
-    s4:(SHLQconst [40] x5:(MOVBloadidx1 [i+5] {s} p idx mem)))
-    s5:(SHLQconst [48] x6:(MOVBloadidx1 [i+6] {s} p idx mem)))
-    s6:(SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+
+(ORQ
+    s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
+    or:(ORQ
+        s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem)
-
-// Combine 2 byte loads + shifts into (unaligned) word load + rolw 8
-(ORL
-                       x0:(MOVBload [i] {s} p mem)
-    s0:(SHLLconst [8]  x1:(MOVBload [i-1] {s} p mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+
+(ORQ
+    s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem))
+    or:(ORQ
+        s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       y))
+  && i1 == i0+2
+  && j1 == j0+16
+  && j0 % 32 == 0
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i-1] {s} p mem))
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+
+// Big-endian loads
+
+(ORL
+                       x1:(MOVBload [i1] {s} p mem)
+    sh:(SHLLconst [8]  x0:(MOVBload [i0] {s} p mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+
+(ORQ
+                       x1:(MOVBload [i1] {s} p mem)
+    sh:(SHLQconst [8]  x0:(MOVBload [i0] {s} p mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+
+(ORL
+                        r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
+    sh:(SHLLconst [16]  r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+
+(ORQ
+                        r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
+    sh:(SHLQconst [16]  r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+
+(ORQ
+                        r1:(BSWAPL x1:(MOVLload [i1] {s} p mem))
+    sh:(SHLQconst [32]  r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
 
 (ORL
-                       x0:(MOVBloadidx1 [i] {s} p idx mem)
-    s0:(SHLLconst [8]  x1:(MOVBloadidx1 [i-1] {s} p idx mem)))
+    s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
+    or:(ORL
+        s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 <v.Type> [i-1] {s} p idx mem))
-
-// Combine byte loads + shifts into larger (unaligned) loads + bswap
-// (for L version first 2 bytes loads are matched as result of above 2-bytes load+shift rewrite)
-(ORL o1:(ORL o0:(ROLWconst [8] x01:(MOVWload [i1] {s} p mem))
-    s1:(SHLLconst [16] x2:(MOVBload [i1-1] {s} p mem)))
-    s2:(SHLLconst [24] x3:(MOVBload [i1-2] {s} p mem)))
-  && x01.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+
+(ORQ
+    s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
+    or:(ORQ
+        s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && mergePoint(b,x01,x2,x3) != nil
-  && clobber(x01)
-  && clobber(x2)
-  && clobber(x3)
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(o0)
-  && clobber(o1)
-  -> @mergePoint(b,x01,x2,x3) (BSWAPL <v.Type> (MOVLload [i1-2] {s} p mem))
-
-(ORL o1:(ORL o0:(ROLWconst [8] x01:(MOVWloadidx1 [i1] {s} p idx mem))
-    s1:(SHLLconst [16] x2:(MOVBloadidx1 [i1-1] {s} p idx mem)))
-    s2:(SHLLconst [24] x3:(MOVBloadidx1 [i1-2] {s} p idx mem)))
-  && x01.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+
+(ORQ
+    s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))
+    or:(ORQ
+        s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
+       y))
+  && i1 == i0+2
+  && j1 == j0-16
+  && j1 % 32 == 0
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && mergePoint(b,x01,x2,x3) != nil
-  && clobber(x01)
-  && clobber(x2)
-  && clobber(x3)
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(o0)
-  && clobber(o1)
-  -> @mergePoint(b,x01,x2,x3) (BSWAPL <v.Type> (MOVLloadidx1 <v.Type> [i1-2] {s} p idx mem))
-
-(ORQ o5:(ORQ o4:(ORQ o3:(ORQ o2:(ORQ o1:(ORQ o0:(ORQ
-                       x0:(MOVBload [i] {s} p mem)
-    s0:(SHLQconst [8]  x1:(MOVBload [i-1] {s} p mem)))
-    s1:(SHLQconst [16] x2:(MOVBload [i-2] {s} p mem)))
-    s2:(SHLQconst [24] x3:(MOVBload [i-3] {s} p mem)))
-    s3:(SHLQconst [32] x4:(MOVBload [i-4] {s} p mem)))
-    s4:(SHLQconst [40] x5:(MOVBload [i-5] {s} p mem)))
-    s5:(SHLQconst [48] x6:(MOVBload [i-6] {s} p mem)))
-    s6:(SHLQconst [56] x7:(MOVBload [i-7] {s} p mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+
+// Big-endian indexed loads
+
+(ORL
+                       x1:(MOVBloadidx1 [i1] {s} p idx mem)
+    sh:(SHLLconst [8]  x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+
+(ORQ
+                       x1:(MOVBloadidx1 [i1] {s} p idx mem)
+    sh:(SHLQconst [8]  x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+
+(ORL
+                        r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
+    sh:(SHLLconst [16]  r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+
+(ORQ
+                        r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
+    sh:(SHLQconst [16]  r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+
+(ORQ
+                        r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem))
+    sh:(SHLQconst [32]  r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+
+(ORL
+    s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
+    or:(ORL
+        s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (BSWAPQ <v.Type> (MOVQload [i-7] {s} p mem))
-
-(ORQ o5:(ORQ o4:(ORQ o3:(ORQ o2:(ORQ o1:(ORQ o0:(ORQ
-                       x0:(MOVBloadidx1 [i] {s} p idx mem)
-    s0:(SHLQconst [8]  x1:(MOVBloadidx1 [i-1] {s} p idx mem)))
-    s1:(SHLQconst [16] x2:(MOVBloadidx1 [i-2] {s} p idx mem)))
-    s2:(SHLQconst [24] x3:(MOVBloadidx1 [i-3] {s} p idx mem)))
-    s3:(SHLQconst [32] x4:(MOVBloadidx1 [i-4] {s} p idx mem)))
-    s4:(SHLQconst [40] x5:(MOVBloadidx1 [i-5] {s} p idx mem)))
-    s5:(SHLQconst [48] x6:(MOVBloadidx1 [i-6] {s} p idx mem)))
-    s6:(SHLQconst [56] x7:(MOVBloadidx1 [i-7] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+
+(ORQ
+    s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
+    or:(ORQ
+        s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (BSWAPQ <v.Type> (MOVQloadidx1 <v.Type> [i-7] {s} p idx mem))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+
+(ORQ
+    s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))
+    or:(ORQ
+        s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       y))
+  && i1 == i0+2
+  && j1 == j0-16
+  && j1 % 32 == 0
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(s0)
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
 
 // Combine 2 byte stores + shift into rolw 8 + word store
 (MOVBstore [i] {s} p w
 // Merge load and op
 // TODO: add indexed variants?
 (ADDQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem)
-(ADDQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem)
 (ADDL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem)
-(ADDL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem)
 (SUBQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBQmem x [off] {sym} ptr mem)
 (SUBL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBLmem x [off] {sym} ptr mem)
 (ANDQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem)
-(ANDQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem)
 (ANDL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem)
-(ANDL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem)
-(ORQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
-(ORQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
-(ORL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
-(ORL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
+(ORQ  x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
+(ORL  x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
 (XORQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (XORQmem x [off] {sym} ptr mem)
-(XORQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (XORQmem x [off] {sym} ptr mem)
 (XORL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (XORLmem x [off] {sym} ptr mem)
-(XORL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (XORLmem x [off] {sym} ptr mem)
 (ADDSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem)
-(ADDSD l:(MOVSDload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem)
 (ADDSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem)
-(ADDSS l:(MOVSSload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem)
 (SUBSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBSDmem x [off] {sym} ptr mem)
 (SUBSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBSSmem x [off] {sym} ptr mem)
 (MULSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem)
-(MULSD l:(MOVSDload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem)
 (MULSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem)
-(MULSS l:(MOVSSload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem)
 
 // Merge ADDQconst and LEAQ into atomic loads.
 (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
index f9731047e7e7ccca350d375455b3cefcf86b87ab..a859c63aa48b673dad3d8a7666c9f97b62db053a 100644 (file)
@@ -202,10 +202,10 @@ func init() {
                {name: "MULQconst", argLength: 1, reg: gp11, asm: "IMULQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
                {name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
 
-               {name: "HMULQ", argLength: 2, reg: gp21hmul, asm: "IMULQ", clobberFlags: true}, // (arg0 * arg1) >> width
-               {name: "HMULL", argLength: 2, reg: gp21hmul, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
-               {name: "HMULQU", argLength: 2, reg: gp21hmul, asm: "MULQ", clobberFlags: true}, // (arg0 * arg1) >> width
-               {name: "HMULLU", argLength: 2, reg: gp21hmul, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULQ", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULQ", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULL", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULQU", argLength: 2, reg: gp21hmul, commutative: true, asm: "MULQ", clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "HMULLU", argLength: 2, reg: gp21hmul, commutative: true, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
 
                {name: "AVGQU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits
 
@@ -216,8 +216,8 @@ func init() {
                {name: "DIVLU", argLength: 2, reg: gp11div, typ: "(UInt32,UInt32)", asm: "DIVL", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
                {name: "DIVWU", argLength: 2, reg: gp11div, typ: "(UInt16,UInt16)", asm: "DIVW", clobberFlags: true}, // [arg0 / arg1, arg0 % arg1]
 
-               {name: "MULQU2", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}, asm: "MULQ", clobberFlags: true},     // arg0 * arg1, returns (hi, lo)
-               {name: "DIVQU2", argLength: 3, reg: regInfo{inputs: []regMask{dx, ax, gpsp}, outputs: []regMask{ax, dx}}, asm: "DIVQ", clobberFlags: true}, // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r)
+               {name: "MULQU2", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}, commutative: true, asm: "MULQ", clobberFlags: true}, // arg0 * arg1, returns (hi, lo)
+               {name: "DIVQU2", argLength: 3, reg: regInfo{inputs: []regMask{dx, ax, gpsp}, outputs: []regMask{ax, dx}}, asm: "DIVQ", clobberFlags: true},                // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r)
 
                {name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
                {name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
@@ -251,43 +251,43 @@ func init() {
                {name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 32
                {name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 64
 
-               {name: "TESTQ", argLength: 2, reg: gp2flags, asm: "TESTQ", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-               {name: "TESTL", argLength: 2, reg: gp2flags, asm: "TESTL", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-               {name: "TESTW", argLength: 2, reg: gp2flags, asm: "TESTW", typ: "Flags"},                    // (arg0 & arg1) compare to 0
-               {name: "TESTB", argLength: 2, reg: gp2flags, asm: "TESTB", typ: "Flags"},                    // (arg0 & arg1) compare to 0
+               {name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0
+               {name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0
+               {name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0
+               {name: "TESTB", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTB", typ: "Flags"}, // (arg0 & arg1) compare to 0
                {name: "TESTQconst", argLength: 1, reg: gp1flags, asm: "TESTQ", typ: "Flags", aux: "Int64"}, // (arg0 & auxint) compare to 0
                {name: "TESTLconst", argLength: 1, reg: gp1flags, asm: "TESTL", typ: "Flags", aux: "Int32"}, // (arg0 & auxint) compare to 0
                {name: "TESTWconst", argLength: 1, reg: gp1flags, asm: "TESTW", typ: "Flags", aux: "Int16"}, // (arg0 & auxint) compare to 0
                {name: "TESTBconst", argLength: 1, reg: gp1flags, asm: "TESTB", typ: "Flags", aux: "Int8"},  // (arg0 & auxint) compare to 0
 
-               {name: "SHLQ", argLength: 2, reg: gp21shift, asm: "SHLQ", resultInArg0: true, clobberFlags: true},               // arg0 << arg1, shift amount is mod 64
-               {name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true, clobberFlags: true},               // arg0 << arg1, shift amount is mod 32
-               {name: "SHLQconst", argLength: 1, reg: gp11, asm: "SHLQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-63
-               {name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-31
+               {name: "SHLQ", argLength: 2, reg: gp21shift, asm: "SHLQ", resultInArg0: true, clobberFlags: true},              // arg0 << arg1, shift amount is mod 64
+               {name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true, clobberFlags: true},              // arg0 << arg1, shift amount is mod 32
+               {name: "SHLQconst", argLength: 1, reg: gp11, asm: "SHLQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-63
+               {name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-31
                // Note: x86 is weird, the 16 and 8 byte shifts still use all 5 bits of shift amount!
 
-               {name: "SHRQ", argLength: 2, reg: gp21shift, asm: "SHRQ", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 64
-               {name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-               {name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-               {name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true, clobberFlags: true},               // unsigned arg0 >> arg1, shift amount is mod 32
-               {name: "SHRQconst", argLength: 1, reg: gp11, asm: "SHRQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-63
-               {name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
-               {name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-15
-               {name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // unsigned arg0 >> auxint, shift amount 0-7
-
-               {name: "SARQ", argLength: 2, reg: gp21shift, asm: "SARQ", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 64
-               {name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-               {name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-               {name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true, clobberFlags: true},               // signed arg0 >> arg1, shift amount is mod 32
-               {name: "SARQconst", argLength: 1, reg: gp11, asm: "SARQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
-               {name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
-               {name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-15
-               {name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // signed arg0 >> auxint, shift amount 0-7
-
-               {name: "ROLQconst", argLength: 1, reg: gp11, asm: "ROLQ", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-63
-               {name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-31
-               {name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
-               {name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // arg0 rotate left auxint, rotate amount 0-7
+               {name: "SHRQ", argLength: 2, reg: gp21shift, asm: "SHRQ", resultInArg0: true, clobberFlags: true},              // unsigned arg0 >> arg1, shift amount is mod 64
+               {name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true, clobberFlags: true},              // unsigned arg0 >> arg1, shift amount is mod 32
+               {name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true, clobberFlags: true},              // unsigned arg0 >> arg1, shift amount is mod 32
+               {name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true, clobberFlags: true},              // unsigned arg0 >> arg1, shift amount is mod 32
+               {name: "SHRQconst", argLength: 1, reg: gp11, asm: "SHRQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-63
+               {name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int8", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31
+               {name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int8", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-15
+               {name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-7
+
+               {name: "SARQ", argLength: 2, reg: gp21shift, asm: "SARQ", resultInArg0: true, clobberFlags: true},              // signed arg0 >> arg1, shift amount is mod 64
+               {name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true, clobberFlags: true},              // signed arg0 >> arg1, shift amount is mod 32
+               {name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true, clobberFlags: true},              // signed arg0 >> arg1, shift amount is mod 32
+               {name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true, clobberFlags: true},              // signed arg0 >> arg1, shift amount is mod 32
+               {name: "SARQconst", argLength: 1, reg: gp11, asm: "SARQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
+               {name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int8", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
+               {name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int8", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-15
+               {name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-7
+
+               {name: "ROLQconst", argLength: 1, reg: gp11, asm: "ROLQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-63
+               {name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-31
+               {name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
+               {name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-7
 
                {name: "ADDLmem", argLength: 3, reg: gp21load, asm: "ADDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
                {name: "ADDQmem", argLength: 3, reg: gp21load, asm: "ADDQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
@@ -374,7 +374,7 @@ func init() {
                {name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation.
 
                {name: "LEAQ", argLength: 1, reg: gp11sb, asm: "LEAQ", aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux
-               {name: "LEAQ1", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                                      // arg0 + arg1 + auxint + aux
+               {name: "LEAQ1", argLength: 2, reg: gp21sb, commutative: true, aux: "SymOff", symEffect: "Addr"},                   // arg0 + arg1 + auxint + aux
                {name: "LEAQ2", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                                      // arg0 + 2*arg1 + auxint + aux
                {name: "LEAQ4", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                                      // arg0 + 4*arg1 + auxint + aux
                {name: "LEAQ8", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"},                                      // arg0 + 8*arg1 + auxint + aux
@@ -398,14 +398,15 @@ func init() {
                {name: "MOVOstore", argLength: 3, reg: fpstore, asm: "MOVUPS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},  // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem
 
                // indexed loads/stores
-               {name: "MOVBloadidx1", argLength: 3, reg: gploadidx, asm: "MOVBLZX", aux: "SymOff", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVWloadidx1", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
-               {name: "MOVLloadidx1", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", symEffect: "Read"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", symEffect: "Read"},    // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
-               {name: "MOVQloadidx1", argLength: 3, reg: gploadidx, asm: "MOVQ", aux: "SymOff", symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVQloadidx8", argLength: 3, reg: gploadidx, asm: "MOVQ", aux: "SymOff", symEffect: "Read"},    // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
+               {name: "MOVBloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8", symEffect: "Read"},  // load a byte from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVWloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16", symEffect: "Read"},                    // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
+               {name: "MOVLloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVL", aux: "SymOff", typ: "UInt32", symEffect: "Read"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", typ: "UInt32", symEffect: "Read"},                       // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
+               {name: "MOVQloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVQ", aux: "SymOff", typ: "UInt64", symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVQloadidx8", argLength: 3, reg: gploadidx, asm: "MOVQ", aux: "SymOff", typ: "UInt64", symEffect: "Read"},                       // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
                // TODO: sign-extending indexed loads
+               // TODO: mark the MOVXstoreidx1 ops as commutative.  Generates too many rewrite rules at the moment.
                {name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff", symEffect: "Write"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
                {name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
                {name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
index 92f2c0b3022fc3c44bef0c20bcf35120c2679c46..52612ec2c17e9f6241762b5c9956e6a16a3f6ad5 100644 (file)
 (MOVWloadshiftRA ptr idx [c] (MOVWstoreshiftRA ptr2 idx [d] x _)) && c==d && isSamePtr(ptr, ptr2) -> x
 
 // fold constant into arithmatic ops
-(ADD (MOVWconst [c]) x) -> (ADDconst [c] x)
 (ADD x (MOVWconst [c])) -> (ADDconst [c] x)
 (SUB (MOVWconst [c]) x) -> (RSBconst [c] x)
 (SUB x (MOVWconst [c])) -> (SUBconst [c] x)
 (RSB (MOVWconst [c]) x) -> (SUBconst [c] x)
 (RSB x (MOVWconst [c])) -> (RSBconst [c] x)
 
-(ADDS (MOVWconst [c]) x) -> (ADDSconst [c] x)
 (ADDS x (MOVWconst [c])) -> (ADDSconst [c] x)
-(SUBS (MOVWconst [c]) x) -> (RSBSconst [c] x)
 (SUBS x (MOVWconst [c])) -> (SUBSconst [c] x)
 
 (ADC (MOVWconst [c]) x flags) -> (ADCconst [c] x flags)
 (SBC (MOVWconst [c]) x flags) -> (RSCconst [c] x flags)
 (SBC x (MOVWconst [c]) flags) -> (SBCconst [c] x flags)
 
-(AND (MOVWconst [c]) x) -> (ANDconst [c] x)
 (AND x (MOVWconst [c])) -> (ANDconst [c] x)
-(OR (MOVWconst [c]) x) -> (ORconst [c] x)
-(OR x (MOVWconst [c])) -> (ORconst [c] x)
-(XOR (MOVWconst [c]) x) -> (XORconst [c] x)
+(OR  x (MOVWconst [c])) -> (ORconst [c] x)
 (XOR x (MOVWconst [c])) -> (XORconst [c] x)
 (BIC x (MOVWconst [c])) -> (BICconst [c] x)
 
 (MUL x (MOVWconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (RSBshiftLL <x.Type> x x [3]))
 (MUL x (MOVWconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
-(MUL (MOVWconst [c]) x) && int32(c) == -1 -> (RSBconst [0] x)
-(MUL (MOVWconst [0]) _) -> (MOVWconst [0])
-(MUL (MOVWconst [1]) x) -> x
-(MUL (MOVWconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
-(MUL (MOVWconst [c]) x) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
-(MUL (MOVWconst [c]) x) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (RSBshiftLL x x [log2(c+1)])
-(MUL (MOVWconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-(MUL (MOVWconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-(MUL (MOVWconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (RSBshiftLL <x.Type> x x [3]))
-(MUL (MOVWconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-
 (MULA x (MOVWconst [c]) a) && int32(c) == -1 -> (SUB a x)
 (MULA _ (MOVWconst [0]) a) -> a
 (MULA x (MOVWconst [1]) a) -> (ADD x a)
 
 // absorb shifts into ops
 (ADD x (SLLconst [c] y)) -> (ADDshiftLL x y [c])
-(ADD (SLLconst [c] y) x) -> (ADDshiftLL x y [c])
 (ADD x (SRLconst [c] y)) -> (ADDshiftRL x y [c])
-(ADD (SRLconst [c] y) x) -> (ADDshiftRL x y [c])
 (ADD x (SRAconst [c] y)) -> (ADDshiftRA x y [c])
-(ADD (SRAconst [c] y) x) -> (ADDshiftRA x y [c])
 (ADD x (SLL y z)) -> (ADDshiftLLreg x y z)
-(ADD (SLL y z) x) -> (ADDshiftLLreg x y z)
 (ADD x (SRL y z)) -> (ADDshiftRLreg x y z)
-(ADD (SRL y z) x) -> (ADDshiftRLreg x y z)
 (ADD x (SRA y z)) -> (ADDshiftRAreg x y z)
-(ADD (SRA y z) x) -> (ADDshiftRAreg x y z)
 (ADC x (SLLconst [c] y) flags) -> (ADCshiftLL x y [c] flags)
 (ADC (SLLconst [c] y) x flags) -> (ADCshiftLL x y [c] flags)
 (ADC x (SRLconst [c] y) flags) -> (ADCshiftRL x y [c] flags)
 (ADC x (SRA y z) flags) -> (ADCshiftRAreg x y z flags)
 (ADC (SRA y z) x flags) -> (ADCshiftRAreg x y z flags)
 (ADDS x (SLLconst [c] y)) -> (ADDSshiftLL x y [c])
-(ADDS (SLLconst [c] y) x) -> (ADDSshiftLL x y [c])
 (ADDS x (SRLconst [c] y)) -> (ADDSshiftRL x y [c])
-(ADDS (SRLconst [c] y) x) -> (ADDSshiftRL x y [c])
 (ADDS x (SRAconst [c] y)) -> (ADDSshiftRA x y [c])
-(ADDS (SRAconst [c] y) x) -> (ADDSshiftRA x y [c])
 (ADDS x (SLL y z)) -> (ADDSshiftLLreg x y z)
-(ADDS (SLL y z) x) -> (ADDSshiftLLreg x y z)
 (ADDS x (SRL y z)) -> (ADDSshiftRLreg x y z)
-(ADDS (SRL y z) x) -> (ADDSshiftRLreg x y z)
 (ADDS x (SRA y z)) -> (ADDSshiftRAreg x y z)
-(ADDS (SRA y z) x) -> (ADDSshiftRAreg x y z)
 (SUB x (SLLconst [c] y)) -> (SUBshiftLL x y [c])
 (SUB (SLLconst [c] y) x) -> (RSBshiftLL x y [c])
 (SUB x (SRLconst [c] y)) -> (SUBshiftRL x y [c])
 (RSB x (SRA y z)) -> (RSBshiftRAreg x y z)
 (RSB (SRA y z) x) -> (SUBshiftRAreg x y z)
 (AND x (SLLconst [c] y)) -> (ANDshiftLL x y [c])
-(AND (SLLconst [c] y) x) -> (ANDshiftLL x y [c])
 (AND x (SRLconst [c] y)) -> (ANDshiftRL x y [c])
-(AND (SRLconst [c] y) x) -> (ANDshiftRL x y [c])
 (AND x (SRAconst [c] y)) -> (ANDshiftRA x y [c])
-(AND (SRAconst [c] y) x) -> (ANDshiftRA x y [c])
 (AND x (SLL y z)) -> (ANDshiftLLreg x y z)
-(AND (SLL y z) x) -> (ANDshiftLLreg x y z)
 (AND x (SRL y z)) -> (ANDshiftRLreg x y z)
-(AND (SRL y z) x) -> (ANDshiftRLreg x y z)
 (AND x (SRA y z)) -> (ANDshiftRAreg x y z)
-(AND (SRA y z) x) -> (ANDshiftRAreg x y z)
 (OR x (SLLconst [c] y)) -> (ORshiftLL x y [c])
-(OR (SLLconst [c] y) x) -> (ORshiftLL x y [c])
 (OR x (SRLconst [c] y)) -> (ORshiftRL x y [c])
-(OR (SRLconst [c] y) x) -> (ORshiftRL x y [c])
 (OR x (SRAconst [c] y)) -> (ORshiftRA x y [c])
-(OR (SRAconst [c] y) x) -> (ORshiftRA x y [c])
 (OR x (SLL y z)) -> (ORshiftLLreg x y z)
-(OR (SLL y z) x) -> (ORshiftLLreg x y z)
 (OR x (SRL y z)) -> (ORshiftRLreg x y z)
-(OR (SRL y z) x) -> (ORshiftRLreg x y z)
 (OR x (SRA y z)) -> (ORshiftRAreg x y z)
-(OR (SRA y z) x) -> (ORshiftRAreg x y z)
 (XOR x (SLLconst [c] y)) -> (XORshiftLL x y [c])
-(XOR (SLLconst [c] y) x) -> (XORshiftLL x y [c])
 (XOR x (SRLconst [c] y)) -> (XORshiftRL x y [c])
-(XOR (SRLconst [c] y) x) -> (XORshiftRL x y [c])
 (XOR x (SRAconst [c] y)) -> (XORshiftRA x y [c])
-(XOR (SRAconst [c] y) x) -> (XORshiftRA x y [c])
 (XOR x (SRRconst [c] y)) -> (XORshiftRR x y [c])
-(XOR (SRRconst [c] y) x) -> (XORshiftRR x y [c])
 (XOR x (SLL y z)) -> (XORshiftLLreg x y z)
-(XOR (SLL y z) x) -> (XORshiftLLreg x y z)
 (XOR x (SRL y z)) -> (XORshiftRLreg x y z)
-(XOR (SRL y z) x) -> (XORshiftRLreg x y z)
 (XOR x (SRA y z)) -> (XORshiftRAreg x y z)
-(XOR (SRA y z) x) -> (XORshiftRAreg x y z)
 (BIC x (SLLconst [c] y)) -> (BICshiftLL x y [c])
 (BIC x (SRLconst [c] y)) -> (BICshiftRL x y [c])
 (BIC x (SRAconst [c] y)) -> (BICshiftRA x y [c])
 
 // generic simplifications
 (ADD x (RSBconst [0] y)) -> (SUB x y)
-(ADD (RSBconst [0] y) x) -> (SUB x y)
 (ADD <t> (RSBconst [c] x) (RSBconst [d] y)) -> (RSBconst [c+d] (ADD <t> x y))
 (SUB x x) -> (MOVWconst [0])
 (RSB x x) -> (MOVWconst [0])
 (BIC x x) -> (MOVWconst [0])
 
 (ADD (MUL x y) a) -> (MULA x y a)
-(ADD a (MUL x y)) -> (MULA x y a)
 
 (AND x (MVN y)) -> (BIC x y)
-(AND (MVN y) x) -> (BIC x y)
 
 // simplification with *shift ops
 (SUBshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVWconst [0])
 (BICshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVWconst [0])
 (BICshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVWconst [0])
 (AND x (MVNshiftLL y [c])) -> (BICshiftLL x y [c])
-(AND (MVNshiftLL y [c]) x) -> (BICshiftLL x y [c])
 (AND x (MVNshiftRL y [c])) -> (BICshiftRL x y [c])
-(AND (MVNshiftRL y [c]) x) -> (BICshiftRL x y [c])
 (AND x (MVNshiftRA y [c])) -> (BICshiftRA x y [c])
-(AND (MVNshiftRA y [c]) x) -> (BICshiftRA x y [c])
 
 // floating point optimizations
 (CMPF x (MOVFconst [0])) -> (CMPF0 x)
index a4d651604b236c01ba7a00f065b4244e4deb058e..8f7680a34713968c5e335dece56ba36cf0c4e621 100644 (file)
 (MOVDreg x) && x.Uses == 1 -> (MOVDnop x)
 
 // fold constant into arithmatic ops
-(ADD (MOVDconst [c]) x) -> (ADDconst [c] x)
 (ADD x (MOVDconst [c])) -> (ADDconst [c] x)
 (SUB x (MOVDconst [c])) -> (SUBconst [c] x)
-(AND (MOVDconst [c]) x) -> (ANDconst [c] x)
 (AND x (MOVDconst [c])) -> (ANDconst [c] x)
-(OR  (MOVDconst [c]) x) -> (ORconst  [c] x)
 (OR  x (MOVDconst [c])) -> (ORconst  [c] x)
-(XOR (MOVDconst [c]) x) -> (XORconst [c] x)
 (XOR x (MOVDconst [c])) -> (XORconst [c] x)
 (BIC x (MOVDconst [c])) -> (BICconst [c] x)
 
 (MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
 (MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
-(MUL (MOVDconst [-1]) x) -> (NEG x)
-(MUL (MOVDconst [0]) _) -> (MOVDconst [0])
-(MUL (MOVDconst [1]) x) -> x
-(MUL (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
-(MUL (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
-(MUL (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c >= 3 -> (ADDshiftLL x x [log2(c-1)])
-(MUL (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
-(MUL (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-(MUL (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-(MUL (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-(MUL (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-
 (MULW x (MOVDconst [c])) && int32(c)==-1 -> (NEG x)
 (MULW _ (MOVDconst [c])) && int32(c)==0 -> (MOVDconst [0])
 (MULW x (MOVDconst [c])) && int32(c)==1 -> x
 (MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
 (MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
-(MULW (MOVDconst [c]) x) && int32(c)==-1 -> (NEG x)
-(MULW (MOVDconst [c]) _) && int32(c)==0 -> (MOVDconst [0])
-(MULW (MOVDconst [c]) x) && int32(c)==1 -> x
-(MULW (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
-(MULW (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
-(MULW (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
-(MULW (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-(MULW (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-(MULW (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-(MULW (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-
 // div by constant
 (UDIV x (MOVDconst [1])) -> x
 (UDIV x (MOVDconst [c])) && isPowerOfTwo(c) -> (SRLconst [log2(c)] x)
 
 // generic simplifications
 (ADD x (NEG y)) -> (SUB x y)
-(ADD (NEG y) x) -> (SUB x y)
 (SUB x x) -> (MOVDconst [0])
 (AND x x) -> x
 (OR  x x) -> x
 
 // absorb shifts into ops
 (ADD x (SLLconst [c] y)) -> (ADDshiftLL x y [c])
-(ADD (SLLconst [c] y) x) -> (ADDshiftLL x y [c])
 (ADD x (SRLconst [c] y)) -> (ADDshiftRL x y [c])
-(ADD (SRLconst [c] y) x) -> (ADDshiftRL x y [c])
 (ADD x (SRAconst [c] y)) -> (ADDshiftRA x y [c])
-(ADD (SRAconst [c] y) x) -> (ADDshiftRA x y [c])
 (SUB x (SLLconst [c] y)) -> (SUBshiftLL x y [c])
 (SUB x (SRLconst [c] y)) -> (SUBshiftRL x y [c])
 (SUB x (SRAconst [c] y)) -> (SUBshiftRA x y [c])
 (AND x (SLLconst [c] y)) -> (ANDshiftLL x y [c])
-(AND (SLLconst [c] y) x) -> (ANDshiftLL x y [c])
 (AND x (SRLconst [c] y)) -> (ANDshiftRL x y [c])
-(AND (SRLconst [c] y) x) -> (ANDshiftRL x y [c])
 (AND x (SRAconst [c] y)) -> (ANDshiftRA x y [c])
-(AND (SRAconst [c] y) x) -> (ANDshiftRA x y [c])
-(OR  x s:(SLLconst [c] y)) && s.Uses == 1 && clobber(s) -> (ORshiftLL  x y [c]) // useful for combined load
-(OR  s:(SLLconst [c] y) x) && s.Uses == 1 && clobber(s) -> (ORshiftLL  x y [c])
-(OR  x (SLLconst [c] y)) -> (ORshiftLL  x y [c])
-(OR  (SLLconst [c] y) x) -> (ORshiftLL  x y [c])
+(OR  x (SLLconst [c] y)) -> (ORshiftLL  x y [c]) // useful for combined load
 (OR  x (SRLconst [c] y)) -> (ORshiftRL  x y [c])
-(OR  (SRLconst [c] y) x) -> (ORshiftRL  x y [c])
 (OR  x (SRAconst [c] y)) -> (ORshiftRA  x y [c])
-(OR  (SRAconst [c] y) x) -> (ORshiftRA  x y [c])
 (XOR x (SLLconst [c] y)) -> (XORshiftLL x y [c])
-(XOR (SLLconst [c] y) x) -> (XORshiftLL x y [c])
 (XOR x (SRLconst [c] y)) -> (XORshiftRL x y [c])
-(XOR (SRLconst [c] y) x) -> (XORshiftRL x y [c])
 (XOR x (SRAconst [c] y)) -> (XORshiftRA x y [c])
-(XOR (SRAconst [c] y) x) -> (XORshiftRA x y [c])
 (BIC x (SLLconst [c] y)) -> (BICshiftLL x y [c])
 (BIC x (SRLconst [c] y)) -> (BICshiftRL x y [c])
 (BIC x (SRAconst [c] y)) -> (BICshiftRA x y [c])
 // little endian loads
 // b[0] | b[1]<<8 -> load 16-bit
 (ORshiftLL <t> [8]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))
+       y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))
+       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       && i1 == i0+1
        && x0.Uses == 1 && x1.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1
        && mergePoint(b,x0,x1) != nil
        && clobber(x0) && clobber(x1)
        && clobber(y0) && clobber(y1)
-       -> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       -> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
 
 // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit
 (ORshiftLL <t> [24] o0:(ORshiftLL [16]
-                   x0:(MOVHUload [i]   {s} p mem)
-       y1:(MOVDnop x1:(MOVBUload [i+2] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i+3] {s} p mem)))
+                   x0:(MOVHUload [i0] {s} p mem)
+       y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
+       && i2 == i0+2
+       && i3 == i0+3
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
        && y1.Uses == 1 && y2.Uses == 1
        && o0.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2)
        && clobber(y1) && clobber(y2)
        && clobber(o0)
-       -> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       -> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
 
 // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit
 (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
-                   x0:(MOVWUload [i]   {s} p mem)
-       y1:(MOVDnop x1:(MOVBUload [i+4] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i+5] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i+6] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i+7] {s} p mem)))
+                   x0:(MOVWUload [i0] {s} p mem)
+       y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
+       && i4 == i0+4
+       && i5 == i0+5
+       && i6 == i0+6
+       && i7 == i0+7
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
        && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
        && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
        && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
        && clobber(o0) && clobber(o1) && clobber(o2)
-       -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
 
 // b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 32-bit
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))
+       y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem)))
+       y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
+       && i3 == i0+3
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
        && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
        && clobber(o0) && clobber(o1) && clobber(s0)
-       -> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i-3] p) mem)
+       -> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
 
 // b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit, reverse
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i-4] {s} p mem)))
-       y5:(MOVDnop x5:(MOVBUload [i-5] {s} p mem)))
-       y6:(MOVDnop x6:(MOVBUload [i-6] {s} p mem)))
-       y7:(MOVDnop x7:(MOVBUload [i-7] {s} p mem)))
+       y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem)))
+       y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem)))
+       y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem)))
+       y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem)))
+       y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
+       && i3 == i0+3
+       && i4 == i0+4
+       && i5 == i0+5
+       && i6 == i0+6
+       && i7 == i0+7
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
        && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
        && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
        && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
        && clobber(o4) && clobber(o5) && clobber(s0)
-       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i-7] p) mem))
+       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
 
 // big endian loads
 // b[1] | b[0]<<8 -> load 16-bit, reverse
 (ORshiftLL <t> [8]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       && ((i-1)%2 == 0 || i-1<256 && i-1>-256 && !isArg(s) && !isAuto(s))
+       y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem))
+       y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
+       && i1 == i0+1
+       && (i0%2 == 0 || i0<256 && i0>-256 && !isArg(s) && !isAuto(s))
        && x0.Uses == 1 && x1.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1
        && mergePoint(b,x0,x1) != nil
        && clobber(x0) && clobber(x1)
        && clobber(y0) && clobber(y1)
-       -> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i-1] {s} p mem))
+       -> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
 
 // b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit, reverse
 (ORshiftLL <t> [24] o0:(ORshiftLL [16]
-       y0:(REV16W  x0:(MOVHUload [i]   {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))
+       y0:(REV16W  x0:(MOVHUload [i2] {s} p mem))
+       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
        && o0.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2)
        && clobber(y0) && clobber(y1) && clobber(y2)
        && clobber(o0)
-       -> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i-2] p) mem))
+       -> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
 
 // b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit, reverse
 (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
-       y0:(REVW    x0:(MOVWUload [i]   {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i-4] {s} p mem)))
+       y0:(REVW    x0:(MOVWUload [i4] {s} p mem))
+       y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
+       && i3 == i0+3
+       && i4 == i0+4
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
        && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
        && clobber(o0) && clobber(o1) && clobber(o2)
-       -> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i-4] p) mem))
+       -> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
 
 // b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] -> load 32-bit, reverse
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i+2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i+3] {s} p mem)))
+       y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
+       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
+       && i3 == i0+3
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
        && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
        && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
        && clobber(o0) && clobber(o1) && clobber(s0)
-       -> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i] p) mem))
+       -> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
 
 // b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] -> load 64-bit, reverse
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
-       y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i+2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i+3] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i+4] {s} p mem)))
-       y5:(MOVDnop x5:(MOVBUload [i+5] {s} p mem)))
-       y6:(MOVDnop x6:(MOVBUload [i+6] {s} p mem)))
-       y7:(MOVDnop x7:(MOVBUload [i+7] {s} p mem)))
+       y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
+       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem)))
+       y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem)))
+       y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem)))
+       y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
+       && i1 == i0+1
+       && i2 == i0+2
+       && i3 == i0+3
+       && i4 == i0+4
+       && i5 == i0+5
+       && i6 == i0+6
+       && i7 == i0+7
        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
        && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
        && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
        && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
        && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
        && clobber(o4) && clobber(o5) && clobber(s0)
-       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i] p) mem))
+       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
index 3f40951d3bf78456e6549cc3400df19b465ab74b..ec875ddea3341a174f132e10dfad698d94551ae7 100644 (file)
 (MOVWreg x) && x.Uses == 1 -> (MOVWnop x)
 
 // fold constant into arithmatic ops
-(ADD (MOVWconst [c]) x) -> (ADDconst [c] x)
 (ADD x (MOVWconst [c])) -> (ADDconst [c] x)
 (SUB x (MOVWconst [c])) -> (SUBconst [c] x)
-(AND (MOVWconst [c]) x) -> (ANDconst [c] x)
 (AND x (MOVWconst [c])) -> (ANDconst [c] x)
-(OR  (MOVWconst [c]) x) -> (ORconst  [c] x)
 (OR  x (MOVWconst [c])) -> (ORconst  [c] x)
-(XOR (MOVWconst [c]) x) -> (XORconst [c] x)
 (XOR x (MOVWconst [c])) -> (XORconst [c] x)
-(NOR (MOVWconst [c]) x) -> (NORconst [c] x)
 (NOR x (MOVWconst [c])) -> (NORconst [c] x)
 
 (SLL _ (MOVWconst [c])) && uint32(c)>=32 -> (MOVWconst [0])
 (SGTU x (MOVWconst [0])) -> (SGTUzero x)
 
 // mul with constant
-(Select1 (MULTU x (MOVWconst [c]))) && x.Op != OpMIPSMOVWconst-> (Select1 (MULTU (MOVWconst [c]) x ))
-(Select0 (MULTU x (MOVWconst [c]))) && x.Op != OpMIPSMOVWconst-> (Select0 (MULTU (MOVWconst [c]) x ))
+(Select1 (MULTU x (MOVWconst [c]))) && x.Op != OpMIPSMOVWconst -> (Select1 (MULTU (MOVWconst [c]) x ))
+(Select0 (MULTU x (MOVWconst [c]))) && x.Op != OpMIPSMOVWconst -> (Select0 (MULTU (MOVWconst [c]) x ))
 
 (Select1 (MULTU (MOVWconst [0]) _ )) -> (MOVWconst [0])
 (Select0 (MULTU (MOVWconst [0]) _ )) -> (MOVWconst [0])
 
 // generic simplifications
 (ADD x (NEG y)) -> (SUB x y)
-(ADD (NEG y) x) -> (SUB x y)
 (SUB x x) -> (MOVWconst [0])
 (SUB (MOVWconst [0]) x) -> (NEG x)
 (AND x x) -> x
 
 // conditional move
 (CMOVZ _ b (MOVWconst [0])) -> b
-(CMOVZ a _ (MOVWconst [c])) && c!=0-> a
+(CMOVZ a _ (MOVWconst [c])) && c!=0 -> a
 (CMOVZzero _ (MOVWconst [0])) -> (MOVWconst [0])
-(CMOVZzero a (MOVWconst [c])) && c!=0-> a
+(CMOVZzero a (MOVWconst [c])) && c!=0 -> a
 (CMOVZ a (MOVWconst [0]) c) -> (CMOVZzero a c)
 
 // atomic
 (LoweredAtomicStore ptr (MOVWconst [0]) mem) -> (LoweredAtomicStorezero ptr mem)
-(LoweredAtomicAdd ptr (MOVWconst [c]) mem) && is16Bit(c)-> (LoweredAtomicAddconst [c] ptr mem)
+(LoweredAtomicAdd ptr (MOVWconst [c]) mem) && is16Bit(c) -> (LoweredAtomicAddconst [c] ptr mem)
 
index 42b0dc51bb3e743fa16fc4e7f2b445d78277132b..6dd5461f1fed1ddbd59bd48889ff843b1e0302eb 100644 (file)
 (MOVVreg x) && x.Uses == 1 -> (MOVVnop x)
 
 // fold constant into arithmatic ops
-(ADDV (MOVVconst [c]) x) && is32Bit(c) -> (ADDVconst [c] x)
 (ADDV x (MOVVconst [c])) && is32Bit(c) -> (ADDVconst [c] x)
 (SUBV x (MOVVconst [c])) && is32Bit(c) -> (SUBVconst [c] x)
-(AND (MOVVconst [c]) x) && is32Bit(c) -> (ANDconst [c] x)
 (AND x (MOVVconst [c])) && is32Bit(c) -> (ANDconst [c] x)
-(OR  (MOVVconst [c]) x) && is32Bit(c) -> (ORconst  [c] x)
 (OR  x (MOVVconst [c])) && is32Bit(c) -> (ORconst  [c] x)
-(XOR (MOVVconst [c]) x) && is32Bit(c) -> (XORconst [c] x)
 (XOR x (MOVVconst [c])) && is32Bit(c) -> (XORconst [c] x)
-(NOR (MOVVconst [c]) x) && is32Bit(c) -> (NORconst [c] x)
 (NOR x (MOVVconst [c])) && is32Bit(c) -> (NORconst [c] x)
 
 (SLLV _ (MOVVconst [c])) && uint64(c)>=64 -> (MOVVconst [0])
 
 // generic simplifications
 (ADDV x (NEGV y)) -> (SUBV x y)
-(ADDV (NEGV y) x) -> (SUBV x y)
 (SUBV x x) -> (MOVVconst [0])
 (SUBV (MOVVconst [0]) x) -> (NEGV x)
 (AND x x) -> x
index 4b96d9fc521796df8b8fdbcb4cb2a90b9153d152..5c4fe536374fff4bf88caa35eae453f43a4365da 100644 (file)
 (Move [1] dst src mem) -> (MOVBstore dst (MOVBZload src mem) mem)
 (Move [2] dst src mem) ->
         (MOVHstore dst (MOVHZload src mem) mem)
+(Move [4] {t} dst src mem) && t.(Type).Alignment()%4 == 0 ->
+       (MOVWstore dst (MOVWload src mem) mem)
+(Move [4] {t} dst src mem) && t.(Type).Alignment()%2 == 0 ->
+       (MOVHstore [2] dst (MOVHZload [2] src mem)
+               (MOVHstore dst (MOVHZload src mem) mem))
+(Move [4] dst src mem) ->
+       (MOVBstore [3] dst (MOVBZload [3] src mem)
+               (MOVBstore [2] dst (MOVBZload [2] src mem)
+                       (MOVBstore [1] dst (MOVBZload [1] src mem)
+                               (MOVBstore dst (MOVBZload src mem) mem))))
+
+(Move [8] {t} dst src mem) && t.(Type).Alignment()%8 == 0 ->
+       (MOVDstore dst (MOVDload src mem) mem)
+(Move [8] {t} dst src mem) && t.(Type).Alignment()%4 == 0 ->
+       (MOVWstore [4] dst (MOVWZload [4] src mem)
+               (MOVWstore dst (MOVWZload src mem) mem))
+(Move [8] {t} dst src mem) && t.(Type).Alignment()%2 == 0 ->
+       (MOVHstore [6] dst (MOVHZload [6] src mem)
+               (MOVHstore [4] dst (MOVHZload [4] src mem)
+                       (MOVHstore [2] dst (MOVHZload [2] src mem)
+                               (MOVHstore dst (MOVHZload src mem) mem))))
+
 (Move [3] dst src mem) ->
         (MOVBstore [2] dst (MOVBZload [2] src mem)
                 (MOVHstore dst (MOVHload src mem) mem))
 (AND x (MOVDconst [c])) && isU16Bit(c) -> (ANDconst [c] x)
 (XOR x (MOVDconst [c])) && isU32Bit(c) -> (XORconst [c] x)
 (OR x (MOVDconst [c])) && isU32Bit(c) -> (ORconst [c] x)
-(AND (MOVDconst [c]) x) && isU16Bit(c) -> (ANDconst [c] x)
-(XOR (MOVDconst [c]) x) && isU32Bit(c) -> (XORconst [c] x)
-(OR (MOVDconst [c]) x) && isU32Bit(c) -> (ORconst [c] x)
 
 // Simplify consts
 (ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c&d] x)
 
 // Arithmetic constant ops
 
-(ADD (MOVDconst [c]) x) && is32Bit(c) -> (ADDconst [c] x)
 (ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
 (ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) -> (ADDconst [c+d] x)
 (ADDconst [0] x) -> x
 (AND x:(MOVBZload _ _) (MOVDconst [c])) -> (ANDconst [c&0xFF] x)
 
 // floating-point fused multiply-add/sub
-(FADD z (FMUL x y)) -> (FMADD x y z)
 (FADD (FMUL x y) z) -> (FMADD x y z)
 (FSUB (FMUL x y) z) -> (FMSUB x y z)
-(FADDS z (FMULS x y)) -> (FMADDS x y z)
 (FADDS (FMULS x y) z) -> (FMADDS x y z)
 (FSUBS (FMULS x y) z) -> (FMSUBS x y z)
index 04810e2c7d9157d454b3b609fd256d1728d64ca2..78de5c076451238cf9eb31731f0a818038508604 100644 (file)
@@ -224,7 +224,7 @@ func init() {
                {name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                // arg0&^arg1
                {name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                 // arg0|arg1
                {name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                  // arg0|^arg1
-               {name: "NOR", argLength: 2, reg: gp21, asm: "NOR"},                                  // ^(arg0|arg1)
+               {name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},               // ^(arg0|arg1)
                {name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true}, // arg0^arg1
                {name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
                {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                  // -arg0 (integer)
index ef96cc0d8210f492fc87886365fc649bca90ec84..7857c0e18060c23bce6a0df60559de21e649b6ca 100644 (file)
 
 // Fold constants into instructions.
 (ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
-(ADD (MOVDconst [c]) x) && is32Bit(c) -> (ADDconst [c] x)
 (ADDW x (MOVDconst [c])) -> (ADDWconst [c] x)
-(ADDW (MOVDconst [c]) x) -> (ADDWconst [c] x)
 
 (SUB x (MOVDconst [c])) && is32Bit(c) -> (SUBconst x [c])
 (SUB (MOVDconst [c]) x) && is32Bit(c) -> (NEG (SUBconst <v.Type> x [c]))
 (SUBW (MOVDconst [c]) x) -> (NEGW (SUBWconst <v.Type> x [c]))
 
 (MULLD x (MOVDconst [c])) && is32Bit(c) -> (MULLDconst [c] x)
-(MULLD (MOVDconst [c]) x) && is32Bit(c) -> (MULLDconst [c] x)
 (MULLW x (MOVDconst [c])) -> (MULLWconst [c] x)
-(MULLW (MOVDconst [c]) x) -> (MULLWconst [c] x)
 
 // NILF instructions leave the high 32 bits unchanged which is
 // equivalent to the leftmost 32 bits being set.
 // TODO(mundaym): modify the assembler to accept 64-bit values
 // and use isU32Bit(^c).
 (AND x (MOVDconst [c])) && is32Bit(c) && c < 0 -> (ANDconst [c] x)
-(AND (MOVDconst [c]) x) && is32Bit(c) && c < 0 -> (ANDconst [c] x)
 (ANDW x (MOVDconst [c])) -> (ANDWconst [c] x)
-(ANDW (MOVDconst [c]) x) -> (ANDWconst [c] x)
 
 (ANDWconst [c] (ANDWconst [d] x)) -> (ANDWconst [c & d] x)
 (ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c & d] x)
 
 (OR x (MOVDconst [c])) && isU32Bit(c) -> (ORconst [c] x)
-(OR (MOVDconst [c]) x) && isU32Bit(c) -> (ORconst [c] x)
 (ORW x (MOVDconst [c])) -> (ORWconst [c] x)
-(ORW (MOVDconst [c]) x) -> (ORWconst [c] x)
 
 (XOR x (MOVDconst [c])) && isU32Bit(c) -> (XORconst [c] x)
-(XOR (MOVDconst [c]) x) && isU32Bit(c) -> (XORconst [c] x)
 (XORW x (MOVDconst [c])) -> (XORWconst [c] x)
-(XORW (MOVDconst [c]) x) -> (XORWconst [c] x)
 
 (SLD x (MOVDconst [c])) -> (SLDconst [c&63] x)
 (SLW x (MOVDconst [c])) -> (SLWconst [c&63] x)
 (SRD x (ANDconst [63] y)) -> (SRD x y)
 
 // Rotate generation
-(ADD (SLDconst x [c]) (SRDconst x [64-c])) -> (RLLGconst [   c] x)
-( OR (SLDconst x [c]) (SRDconst x [64-c])) -> (RLLGconst [   c] x)
-(XOR (SLDconst x [c]) (SRDconst x [64-c])) -> (RLLGconst [   c] x)
-(ADD (SRDconst x [c]) (SLDconst x [64-c])) -> (RLLGconst [64-c] x)
-( OR (SRDconst x [c]) (SLDconst x [64-c])) -> (RLLGconst [64-c] x)
-(XOR (SRDconst x [c]) (SLDconst x [64-c])) -> (RLLGconst [64-c] x)
-
-(ADDW (SLWconst x [c]) (SRWconst x [32-c])) -> (RLLconst [   c] x)
-( ORW (SLWconst x [c]) (SRWconst x [32-c])) -> (RLLconst [   c] x)
-(XORW (SLWconst x [c]) (SRWconst x [32-c])) -> (RLLconst [   c] x)
-(ADDW (SRWconst x [c]) (SLWconst x [32-c])) -> (RLLconst [32-c] x)
-( ORW (SRWconst x [c]) (SLWconst x [32-c])) -> (RLLconst [32-c] x)
-(XORW (SRWconst x [c]) (SLWconst x [32-c])) -> (RLLconst [32-c] x)
+(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
+( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
+(XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
+
+(ADDW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
+( ORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
+(XORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
 
 (CMP x (MOVDconst [c])) && is32Bit(c) -> (CMPconst x [c])
 (CMP (MOVDconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPconst x [c]))
 (CMPWU (MOVDconst [c]) x) -> (InvertFlags (CMPWUconst x [int64(uint32(c))]))
 
 // Using MOV{W,H,B}Zreg instead of AND is cheaper.
-(AND (MOVDconst [0xFF]) x) -> (MOVBZreg x)
 (AND x (MOVDconst [0xFF])) -> (MOVBZreg x)
-(AND (MOVDconst [0xFFFF]) x) -> (MOVHZreg x)
 (AND x (MOVDconst [0xFFFF])) -> (MOVHZreg x)
-(AND (MOVDconst [0xFFFFFFFF]) x) -> (MOVWZreg x)
 (AND x (MOVDconst [0xFFFFFFFF])) -> (MOVWZreg x)
 (ANDWconst [0xFF] x) -> (MOVBZreg x)
 (ANDWconst [0xFFFF] x) -> (MOVHZreg x)
 // Fold ADD into MOVDaddr. Odd offsets from SB shouldn't be folded (LARL can't handle them).
 (ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(c+d) -> (MOVDaddr [c+d] {s} x)
 (ADDconst [c] (MOVDaddr [d] {s} x)) && x.Op != OpSB && is20Bit(c+d) -> (MOVDaddr [c+d] {s} x)
-(ADD x (MOVDaddr [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (MOVDaddridx [c] {s} x y)
-(ADD (MOVDaddr [c] {s} x) y) && x.Op != OpSB && y.Op != OpSB -> (MOVDaddridx [c] {s} x y)
+(ADD idx (MOVDaddr [c] {s} ptr)) && ptr.Op != OpSB && idx.Op != OpSB -> (MOVDaddridx [c] {s} ptr idx)
 
 // fold ADDconst into MOVDaddrx
 (ADDconst [c] (MOVDaddridx [d] {s} x y)) && is20Bit(c+d) -> (MOVDaddridx [c+d] {s} x y)
 (NEG (ADDconst [c] (NEG x))) && c != -(1<<31) -> (ADDconst [-c] x)
 
 // fused multiply-add
-(FADD x (FMUL y z)) -> (FMADD x y z)
-(FADDS x (FMULS y z)) -> (FMADDS x y z)
 (FADD (FMUL y z) x) -> (FMADD x y z)
 (FADDS (FMULS y z) x) -> (FMADDS x y z)
 (FSUB (FMUL y z) x) -> (FMSUB x y z)
   && clobber(x)
   -> (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
 
-// Move shifts to second argument of ORs.  Helps load combining rules below.
-(ORW x:(SLWconst _) y) && y.Op != OpS390XSLWconst -> (ORW y x)
-(OR  x:(SLDconst _) y) && y.Op != OpS390XSLDconst -> (OR  y x)
-
 // Combining byte loads into larger (unaligned) loads.
 
-// Little endian loads.
+// Big-endian loads
 
-// b[0] | b[1]<<8 -> load 16-bit, reverse bytes
-(ORW                 x0:(MOVBZload [i]   {s} p mem)
-    s0:(SLWconst [8] x1:(MOVBZload [i+1] {s} p mem)))
+(ORW                 x1:(MOVBZload [i1] {s} p mem)
+    sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)))
+  && i1 == i0+1
   && p.Op != OpSB
   && x0.Uses == 1
   && x1.Uses == 1
-  && s0.Uses == 1
+  && sh.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i] {s} p mem))
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
+
+(OR                  x1:(MOVBZload [i1] {s} p mem)
+    sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
+  && i1 == i0+1
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
+
+(ORW                  x1:(MOVHZload [i1] {s} p mem)
+    sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
+  && i1 == i0+2
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
+
+(OR                   x1:(MOVHZload [i1] {s} p mem)
+    sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
+  && i1 == i0+2
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
 
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit, reverse bytes
-(ORW o0:(ORW z0:(MOVHZreg x0:(MOVHBRload [i] {s} p mem))
-    s0:(SLWconst [16] x1:(MOVBZload [i+2] {s} p mem)))
-    s1:(SLWconst [24] x2:(MOVBZload [i+3] {s} p mem)))
+(OR                   x1:(MOVWZload [i1] {s} p mem)
+    sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
+  && i1 == i0+4
   && p.Op != OpSB
-  && z0.Uses == 1
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
+
+(ORW
+    s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
+    or:(ORW
+        s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
+  && x0.Uses == 1
+  && x1.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
-  && clobber(z0)
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVWBRload [i] {s} p mem)
-
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit, reverse bytes
-(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
-                      x0:(MOVBZload [i]   {s} p mem)
-    s0:(SLDconst [8]  x1:(MOVBZload [i+1] {s} p mem)))
-    s1:(SLDconst [16] x2:(MOVBZload [i+2] {s} p mem)))
-    s2:(SLDconst [24] x3:(MOVBZload [i+3] {s} p mem)))
-    s3:(SLDconst [32] x4:(MOVBZload [i+4] {s} p mem)))
-    s4:(SLDconst [40] x5:(MOVBZload [i+5] {s} p mem)))
-    s5:(SLDconst [48] x6:(MOVBZload [i+6] {s} p mem)))
-    s6:(SLDconst [56] x7:(MOVBZload [i+7] {s} p mem)))
-  && p.Op != OpSB
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+
+(OR
+    s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
+    or:(OR
+        s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRload [i] {s} p mem)
-
-// b[0] | b[1]<<8 -> load 16-bit, reverse bytes
-(ORW                 x0:(MOVBZloadidx [i]   {s} p idx mem)
-    s0:(SLWconst [8] x1:(MOVBZloadidx [i+1] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+
+(OR
+    s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))
+    or:(OR
+        s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))
+       y))
+  && i1 == i0+2
+  && j1 == j0-16
+  && j1 % 32 == 0
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx <v.Type> [i] {s} p idx mem))
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
+
+// Big-endian indexed loads
+
+(ORW                 x1:(MOVBZloadidx [i1] {s} p idx mem)
+    sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+  && i1 == i0+1
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+
+(OR                  x1:(MOVBZloadidx [i1] {s} p idx mem)
+    sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+  && i1 == i0+1
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+
+(ORW                  x1:(MOVHZloadidx [i1] {s} p idx mem)
+    sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+  && i1 == i0+2
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
 
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit, reverse bytes
-(ORW o0:(ORW z0:(MOVHZreg x0:(MOVHBRloadidx [i] {s} p idx mem))
-    s0:(SLWconst [16] x1:(MOVBZloadidx [i+2] {s} p idx mem)))
-    s1:(SLWconst [24] x2:(MOVBZloadidx [i+3] {s} p idx mem)))
-  && z0.Uses == 1
+(OR                   x1:(MOVHZloadidx [i1] {s} p idx mem)
+    sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+  && i1 == i0+2
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+
+(OR                   x1:(MOVWZloadidx [i1] {s} p idx mem)
+    sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
+  && i1 == i0+4
+  && p.Op != OpSB
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+
+(ORW
+    s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
+    or:(ORW
+        s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
-  && clobber(z0)
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVWZreg (MOVWBRloadidx <v.Type> [i] {s} p idx mem))
-
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit, reverse bytes
-(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
-                      x0:(MOVBZloadidx [i]   {s} p idx mem)
-    s0:(SLDconst [8]  x1:(MOVBZloadidx [i+1] {s} p idx mem)))
-    s1:(SLDconst [16] x2:(MOVBZloadidx [i+2] {s} p idx mem)))
-    s2:(SLDconst [24] x3:(MOVBZloadidx [i+3] {s} p idx mem)))
-    s3:(SLDconst [32] x4:(MOVBZloadidx [i+4] {s} p idx mem)))
-    s4:(SLDconst [40] x5:(MOVBZloadidx [i+5] {s} p idx mem)))
-    s5:(SLDconst [48] x6:(MOVBZloadidx [i+6] {s} p idx mem)))
-    s6:(SLDconst [56] x7:(MOVBZloadidx [i+7] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+
+(OR
+    s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
+    or:(OR
+        s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
+       y))
+  && i1 == i0+1
+  && j1 == j0-8
+  && j1 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRloadidx <v.Type> [i] {s} p idx mem)
-
-// Big endian loads.
-
-// b[1] | b[0]<<8 -> load 16-bit
-(ORW                  x0:(MOVBZload [i]   {s} p mem)
-    s0:(SLWconst [8] x1:(MOVBZload [i-1] {s} p mem)))
-  && p.Op != OpSB
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+
+(OR
+    s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem))
+    or:(OR
+        s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))
+       y))
+  && i1 == i0+2
+  && j1 == j0-16
+  && j1 % 32 == 0
   && x0.Uses == 1
   && x1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVHZload [i-1] {s} p mem)
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+
+// Little-endian loads
 
-// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit
-(ORW o0:(ORW x0:(MOVHZload [i] {s} p mem)
-    s0:(SLWconst [16] x1:(MOVBZload [i-1] {s} p mem)))
-    s1:(SLWconst [24] x2:(MOVBZload [i-2] {s} p mem)))
+(ORW                 x0:(MOVBZload [i0] {s} p mem)
+    sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)))
   && p.Op != OpSB
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+
+(OR                  x0:(MOVBZload [i0] {s} p mem)
+    sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
+  && p.Op != OpSB
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+
+(ORW                  r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
+    sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
+
+(OR                   r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
+    sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
+
+(OR                   r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem))
+    sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
+
+(ORW
+    s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
+    or:(ORW
+        s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
+       y))
+  && p.Op != OpSB
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVWZload [i-2] {s} p mem)
-
-// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit
-(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
-                      x0:(MOVBZload [i]   {s} p mem)
-    s0:(SLDconst [8]  x1:(MOVBZload [i-1] {s} p mem)))
-    s1:(SLDconst [16] x2:(MOVBZload [i-2] {s} p mem)))
-    s2:(SLDconst [24] x3:(MOVBZload [i-3] {s} p mem)))
-    s3:(SLDconst [32] x4:(MOVBZload [i-4] {s} p mem)))
-    s4:(SLDconst [40] x5:(MOVBZload [i-5] {s} p mem)))
-    s5:(SLDconst [48] x6:(MOVBZload [i-6] {s} p mem)))
-    s6:(SLDconst [56] x7:(MOVBZload [i-7] {s} p mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+
+(OR
+    s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
+    or:(OR
+        s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
+       y))
   && p.Op != OpSB
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload [i-7] {s} p mem)
-
-// b[1] | b[0]<<8 -> load 16-bit
-(ORW                 x0:(MOVBZloadidx [i]   {s} p idx mem)
-    s0:(SLWconst [8] x1:(MOVBZloadidx [i-1] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+
+(OR
+    s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))
+    or:(OR
+        s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
+       y))
+  && i1 == i0+2
+  && j1 == j0+16
+  && j0 % 32 == 0
   && x0.Uses == 1
   && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
   && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
   && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
   && clobber(s0)
-  -> @mergePoint(b,x0,x1) (MOVHZloadidx <v.Type> [i-1] {s} p idx mem)
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
+
+// Little-endian indexed loads
+
+(ORW                 x0:(MOVBZloadidx [i0] {s} p idx mem)
+    sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+  && p.Op != OpSB
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+
+(OR                  x0:(MOVBZloadidx [i0] {s} p idx mem)
+    sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+  && p.Op != OpSB
+  && i1 == i0+1
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
 
-// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit
-(ORW o0:(ORW x0:(MOVHZloadidx [i] {s} p idx mem)
-    s0:(SLWconst [16] x1:(MOVBZloadidx [i-1] {s} p idx mem)))
-    s1:(SLWconst [24] x2:(MOVBZloadidx [i-2] {s} p idx mem)))
+(ORW                  r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
+    sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+
+(OR                   r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
+    sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+  && i1 == i0+2
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+
+(OR                   r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem))
+    sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
+  && i1 == i0+4
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && sh.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(r0)
+  && clobber(r1)
+  && clobber(sh)
+  -> @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
+
+(ORW
+    s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
+    or:(ORW
+        s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
+       y))
+  && p.Op != OpSB
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
   && clobber(s0)
   && clobber(s1)
-  && clobber(o0)
-  -> @mergePoint(b,x0,x1,x2) (MOVWZloadidx <v.Type> [i-2] {s} p idx mem)
-
-// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit
-(OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR
-                      x0:(MOVBZloadidx [i]   {s} p idx mem)
-    s0:(SLDconst [8]  x1:(MOVBZloadidx [i-1] {s} p idx mem)))
-    s1:(SLDconst [16] x2:(MOVBZloadidx [i-2] {s} p idx mem)))
-    s2:(SLDconst [24] x3:(MOVBZloadidx [i-3] {s} p idx mem)))
-    s3:(SLDconst [32] x4:(MOVBZloadidx [i-4] {s} p idx mem)))
-    s4:(SLDconst [40] x5:(MOVBZloadidx [i-5] {s} p idx mem)))
-    s5:(SLDconst [48] x6:(MOVBZloadidx [i-6] {s} p idx mem)))
-    s6:(SLDconst [56] x7:(MOVBZloadidx [i-7] {s} p idx mem)))
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+
+(OR
+    s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
+    or:(OR
+        s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
+       y))
+  && p.Op != OpSB
+  && i1 == i0+1
+  && j1 == j0+8
+  && j0 % 16 == 0
   && x0.Uses == 1
   && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && x7.Uses == 1
   && s0.Uses == 1
   && s1.Uses == 1
-  && s2.Uses == 1
-  && s3.Uses == 1
-  && s4.Uses == 1
-  && s5.Uses == 1
-  && s6.Uses == 1
-  && o0.Uses == 1
-  && o1.Uses == 1
-  && o2.Uses == 1
-  && o3.Uses == 1
-  && o4.Uses == 1
-  && o5.Uses == 1
-  && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
+  && clobber(x0)
+  && clobber(x1)
+  && clobber(s0)
+  && clobber(s1)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+
+(OR
+    s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))
+    or:(OR
+        s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
+       y))
+  && i1 == i0+2
+  && j1 == j0+16
+  && j0 % 32 == 0
+  && x0.Uses == 1
+  && x1.Uses == 1
+  && r0.Uses == 1
+  && r1.Uses == 1
+  && s0.Uses == 1
+  && s1.Uses == 1
+  && or.Uses == 1
+  && mergePoint(b,x0,x1) != nil
   && clobber(x0)
   && clobber(x1)
-  && clobber(x2)
-  && clobber(x3)
-  && clobber(x4)
-  && clobber(x5)
-  && clobber(x6)
-  && clobber(x7)
+  && clobber(r0)
+  && clobber(r1)
   && clobber(s0)
   && clobber(s1)
-  && clobber(s2)
-  && clobber(s3)
-  && clobber(s4)
-  && clobber(s5)
-  && clobber(s6)
-  && clobber(o0)
-  && clobber(o1)
-  && clobber(o2)
-  && clobber(o3)
-  && clobber(o4)
-  && clobber(o5)
-  -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <v.Type> [i-7] {s} p idx mem)
+  && clobber(or)
+  -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
 
 // Combine stores into store multiples.
 // 32-bit
index 558d1b63158e807af47919739d5cd2c5a6214967..a207743e7cbe0aba7bf3c379566f8339ffd12669 100644 (file)
@@ -240,8 +240,8 @@ func init() {
                {name: "MULLDload", argLength: 3, reg: gpopload, asm: "MULLD", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem
                {name: "MULLWload", argLength: 3, reg: gpopload, asm: "MULLW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem
 
-               {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", typ: "Int64", resultInArg0: true, clobberFlags: true},   // (arg0 * arg1) >> width
-               {name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", typ: "Int64", resultInArg0: true, clobberFlags: true}, // (arg0 * arg1) >> width
+               {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true},   // (arg0 * arg1) >> width
+               {name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 * arg1) >> width
 
                {name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", resultInArg0: true, clobberFlags: true},   // arg0 / arg1
                {name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", resultInArg0: true, clobberFlags: true},   // arg0 / arg1
@@ -289,24 +289,24 @@ func init() {
                {name: "FCMPS", argLength: 2, reg: fp2flags, asm: "CEBR", typ: "Flags"}, // arg0 compare to arg1, f32
                {name: "FCMP", argLength: 2, reg: fp2flags, asm: "FCMPU", typ: "Flags"}, // arg0 compare to arg1, f64
 
-               {name: "SLD", argLength: 2, reg: sh21, asm: "SLD"},                    // arg0 << arg1, shift amount is mod 64
-               {name: "SLW", argLength: 2, reg: sh21, asm: "SLW"},                    // arg0 << arg1, shift amount is mod 32
-               {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"}, // arg0 << auxint, shift amount 0-63
-               {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int32"}, // arg0 << auxint, shift amount 0-31
+               {name: "SLD", argLength: 2, reg: sh21, asm: "SLD"},                   // arg0 << arg1, shift amount is mod 64
+               {name: "SLW", argLength: 2, reg: sh21, asm: "SLW"},                   // arg0 << arg1, shift amount is mod 32
+               {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int8"}, // arg0 << auxint, shift amount 0-63
+               {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int8"}, // arg0 << auxint, shift amount 0-31
 
-               {name: "SRD", argLength: 2, reg: sh21, asm: "SRD"},                    // unsigned arg0 >> arg1, shift amount is mod 64
-               {name: "SRW", argLength: 2, reg: sh21, asm: "SRW"},                    // unsigned arg0 >> arg1, shift amount is mod 32
-               {name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"}, // unsigned arg0 >> auxint, shift amount 0-63
-               {name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int32"}, // unsigned arg0 >> auxint, shift amount 0-31
+               {name: "SRD", argLength: 2, reg: sh21, asm: "SRD"},                   // unsigned arg0 >> arg1, shift amount is mod 64
+               {name: "SRW", argLength: 2, reg: sh21, asm: "SRW"},                   // unsigned arg0 >> arg1, shift amount is mod 32
+               {name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int8"}, // unsigned arg0 >> auxint, shift amount 0-63
+               {name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int8"}, // unsigned arg0 >> auxint, shift amount 0-31
 
                // Arithmetic shifts clobber flags.
-               {name: "SRAD", argLength: 2, reg: sh21, asm: "SRAD", clobberFlags: true},                    // signed arg0 >> arg1, shift amount is mod 64
-               {name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true},                    // signed arg0 >> arg1, shift amount is mod 32
-               {name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int64", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
-               {name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int32", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
+               {name: "SRAD", argLength: 2, reg: sh21, asm: "SRAD", clobberFlags: true},                   // signed arg0 >> arg1, shift amount is mod 64
+               {name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true},                   // signed arg0 >> arg1, shift amount is mod 32
+               {name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int8", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
+               {name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int8", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31
 
-               {name: "RLLGconst", argLength: 1, reg: gp11, asm: "RLLG", aux: "Int64"}, // arg0 rotate left auxint, rotate amount 0-63
-               {name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "Int32"},   // arg0 rotate left auxint, rotate amount 0-31
+               {name: "RLLGconst", argLength: 1, reg: gp11, asm: "RLLG", aux: "Int8"}, // arg0 rotate left auxint, rotate amount 0-63
+               {name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "Int8"},   // arg0 rotate left auxint, rotate amount 0-31
 
                // unary ops
                {name: "NEG", argLength: 1, reg: gp11, asm: "NEG", clobberFlags: true},   // -arg0
@@ -388,20 +388,20 @@ func init() {
 
                // indexed loads/stores
                // TODO(mundaym): add sign-extended indexed loads
-               {name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", aux: "SymOff", clobberFlags: true, symEffect: "Read"},      // load a byte from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", aux: "SymOff", clobberFlags: true, symEffect: "Read"},      // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", aux: "SymOff", clobberFlags: true, symEffect: "Read"},      // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", aux: "SymOff", clobberFlags: true, symEffect: "Read"},        // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
-               {name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", aux: "SymOff", clobberFlags: true, symEffect: "Read"},    // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
-               {name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", aux: "SymOff", clobberFlags: true, symEffect: "Read"},    // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
-               {name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", aux: "SymOff", clobberFlags: true, symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
-               {name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff", clobberFlags: true, symEffect: "Write"},     // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", aux: "SymOff", clobberFlags: true, symEffect: "Write"},     // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", clobberFlags: true, symEffect: "Write"},     // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", aux: "SymOff", clobberFlags: true, symEffect: "Write"},     // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
-               {name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
-               {name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
+               {name: "MOVBZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", clobberFlags: true, symEffect: "Read"},   // load a byte from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVHZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", clobberFlags: true, symEffect: "Read"},  // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVWZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", clobberFlags: true, symEffect: "Read"},  // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVDloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVD", aux: "SymOff", typ: "UInt64", clobberFlags: true, symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHBR", aux: "SymOff", typ: "Int16", clobberFlags: true, symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
+               {name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWBR", aux: "SymOff", typ: "Int32", clobberFlags: true, symEffect: "Read"}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
+               {name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVDBR", aux: "SymOff", typ: "Int64", clobberFlags: true, symEffect: "Read"}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
+               {name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", aux: "SymOff", clobberFlags: true, symEffect: "Write"},                // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVH", aux: "SymOff", clobberFlags: true, symEffect: "Write"},                // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", aux: "SymOff", clobberFlags: true, symEffect: "Write"},                // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVD", aux: "SymOff", clobberFlags: true, symEffect: "Write"},                // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVHBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"},            // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
+               {name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVWBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"},            // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
+               {name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVDBR", aux: "SymOff", clobberFlags: true, symEffect: "Write"},            // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
 
                // For storeconst ops, the AuxInt field encodes both
                // the value to store and an address offset of the store.
index 6163c093d29a98e73efc423a9d8de7c31ddedd96..c50b91b0cbbd43f6d032be06ff3082acf9c2efd8 100644 (file)
 (Neq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) -> (Neq16 (Const16 <t> [int64(int16(c-d))]) x)
 (Neq8  (Const8  <t> [c]) (Add8  (Const8  <t> [d]) x)) -> (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
 
-// canonicalize: swap arguments for commutative operations when one argument is a constant.
-(Eq64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Eq64 (Const64 <t> [c]) x)
-(Eq32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Eq32 (Const32 <t> [c]) x)
-(Eq16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Eq16 (Const16 <t> [c]) x)
-(Eq8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Eq8  (Const8  <t> [c]) x)
-
-(Neq64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Neq64 (Const64 <t> [c]) x)
-(Neq32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Neq32 (Const32 <t> [c]) x)
-(Neq16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Neq16 (Const16 <t> [c]) x)
-(Neq8  x (Const8 <t>  [c])) && x.Op != OpConst8  -> (Neq8  (Const8  <t> [c]) x)
-
-// AddPtr is not canonicalized because nilcheck ptr checks the first argument to be non-nil.
-(Add64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Add64 (Const64 <t> [c]) x)
-(Add32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Add32 (Const32 <t> [c]) x)
-(Add16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Add16 (Const16 <t> [c]) x)
-(Add8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Add8  (Const8  <t> [c]) x)
-
-(Mul64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Mul64 (Const64 <t> [c]) x)
-(Mul32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Mul32 (Const32 <t> [c]) x)
-(Mul16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Mul16 (Const16 <t> [c]) x)
-(Mul8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Mul8  (Const8  <t> [c]) x)
-
+// Canonicalize x-const to x+(-const)
 (Sub64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Add64 (Const64 <t> [-c]) x)
 (Sub32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Add32 (Const32 <t> [int64(int32(-c))]) x)
 (Sub16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Add16 (Const16 <t> [int64(int16(-c))]) x)
 (Sub8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Add8  (Const8  <t> [int64(int8(-c))]) x)
 
-(And64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (And64 (Const64 <t> [c]) x)
-(And32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (And32 (Const32 <t> [c]) x)
-(And16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (And16 (Const16 <t> [c]) x)
-(And8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (And8  (Const8  <t> [c]) x)
-
-(Or64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Or64 (Const64 <t> [c]) x)
-(Or32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Or32 (Const32 <t> [c]) x)
-(Or16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Or16 (Const16 <t> [c]) x)
-(Or8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Or8  (Const8  <t> [c]) x)
-
-(Xor64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Xor64 (Const64 <t> [c]) x)
-(Xor32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Xor32 (Const32 <t> [c]) x)
-(Xor16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Xor16 (Const16 <t> [c]) x)
-(Xor8  x (Const8  <t> [c])) && x.Op != OpConst8  -> (Xor8  (Const8  <t> [c]) x)
-
 // fold negation into comparison operators
 (Not (Eq64 x y)) -> (Neq64 x y)
 (Not (Eq32 x y)) -> (Neq32 x y)
 (And32 x (And32 x y)) -> (And32 x y)
 (And16 x (And16 x y)) -> (And16 x y)
 (And8  x (And8  x y)) -> (And8  x y)
-(And64 x (And64 y x)) -> (And64 x y)
-(And32 x (And32 y x)) -> (And32 x y)
-(And16 x (And16 y x)) -> (And16 x y)
-(And8  x (And8  y x)) -> (And8  x y)
-(And64 (And64 x y) x) -> (And64 x y)
-(And32 (And32 x y) x) -> (And32 x y)
-(And16 (And16 x y) x) -> (And16 x y)
-(And8  (And8  x y) x) -> (And8  x y)
-(And64 (And64 x y) y) -> (And64 x y)
-(And32 (And32 x y) y) -> (And32 x y)
-(And16 (And16 x y) y) -> (And16 x y)
-(And8  (And8  x y) y) -> (And8  x y)
 (Or64 x (Or64 x y)) -> (Or64 x y)
 (Or32 x (Or32 x y)) -> (Or32 x y)
 (Or16 x (Or16 x y)) -> (Or16 x y)
 (Or8  x (Or8  x y)) -> (Or8  x y)
-(Or64 x (Or64 y x)) -> (Or64 x y)
-(Or32 x (Or32 y x)) -> (Or32 x y)
-(Or16 x (Or16 y x)) -> (Or16 x y)
-(Or8  x (Or8  y x)) -> (Or8  x y)
-(Or64 (Or64 x y) x) -> (Or64 x y)
-(Or32 (Or32 x y) x) -> (Or32 x y)
-(Or16 (Or16 x y) x) -> (Or16 x y)
-(Or8  (Or8  x y) x) -> (Or8  x y)
-(Or64 (Or64 x y) y) -> (Or64 x y)
-(Or32 (Or32 x y) y) -> (Or32 x y)
-(Or16 (Or16 x y) y) -> (Or16 x y)
-(Or8  (Or8  x y) y) -> (Or8  x y)
 (Xor64 x (Xor64 x y)) -> y
 (Xor32 x (Xor32 x y)) -> y
 (Xor16 x (Xor16 x y)) -> y
 (Xor8  x (Xor8  x y)) -> y
-(Xor64 x (Xor64 y x)) -> y
-(Xor32 x (Xor32 y x)) -> y
-(Xor16 x (Xor16 y x)) -> y
-(Xor8  x (Xor8  y x)) -> y
-(Xor64 (Xor64 x y) x) -> y
-(Xor32 (Xor32 x y) x) -> y
-(Xor16 (Xor16 x y) x) -> y
-(Xor8  (Xor8  x y) x) -> y
-(Xor64 (Xor64 x y) y) -> x
-(Xor32 (Xor32 x y) y) -> x
-(Xor16 (Xor16 x y) y) -> x
-(Xor8  (Xor8  x y) y) -> x
 
 (Trunc64to8  (And64 (Const64 [y]) x)) && y&0xFF == 0xFF -> (Trunc64to8 x)
 (Trunc64to16 (And64 (Const64 [y]) x)) && y&0xFFFF == 0xFFFF -> (Trunc64to16 x)
 
 // user nil checks
 (NeqPtr p (ConstNil)) -> (IsNonNil p)
-(NeqPtr (ConstNil) p) -> (IsNonNil p)
 (EqPtr p (ConstNil)) -> (Not (IsNonNil p))
-(EqPtr (ConstNil) p) -> (Not (IsNonNil p))
 (IsNonNil (ConstNil)) -> (ConstBool [0])
 
 // slice and interface comparisons
 
 // Get rid of Convert ops for pointer arithmetic on unsafe.Pointer.
 (Convert (Add64 (Convert ptr mem) off) mem) -> (Add64 ptr off)
-(Convert (Add64 off (Convert ptr mem)) mem) -> (Add64 ptr off)
 (Convert (Convert ptr mem) mem) -> ptr
 
 // Decompose compound argument values
 // Reassociate expressions involving
 // constants such that constants come first,
 // exposing obvious constant-folding opportunities.
-// First, re-write (op x (op y z)) to (op (op y z) x) if
-// the op is commutative, to reduce the number of subsequent
-// matching rules for folding. Then, reassociate
-// (op (op y C) x) to (op C (op x y)) or similar, where C
+// Reassociate (op (op y C) x) to (op C (op x y)) or similar, where C
 // is constant, which pushes constants to the outside
 // of the expression. At that point, any constant-folding
 // opportunities should be obvious.
 
-(Add64 x l:(Add64 _ _)) && (x.Op != OpAdd64 && x.Op != OpConst64) -> (Add64 l x)
-(Add32 x l:(Add32 _ _)) && (x.Op != OpAdd32 && x.Op != OpConst32) -> (Add32 l x)
-(Add16 x l:(Add16 _ _)) && (x.Op != OpAdd16 && x.Op != OpConst16) -> (Add16 l x)
-(Add8  x l:(Add8  _ _)) && (x.Op != OpAdd8  && x.Op != OpConst8)  -> (Add8  l x)
-(And64 x l:(And64 _ _)) && (x.Op != OpAnd64 && x.Op != OpConst64) -> (And64 l x)
-(And32 x l:(And32 _ _)) && (x.Op != OpAnd32 && x.Op != OpConst32) -> (And32 l x)
-(And16 x l:(And16 _ _)) && (x.Op != OpAnd16 && x.Op != OpConst16) -> (And16 l x)
-(And8  x l:(And8  _ _)) && (x.Op != OpAnd8  && x.Op != OpConst8)  -> (And8  l x)
-(Or64 x l:(Or64 _ _)) && (x.Op != OpOr64 && x.Op != OpConst64) -> (Or64 l x)
-(Or32 x l:(Or32 _ _)) && (x.Op != OpOr32 && x.Op != OpConst32) -> (Or32 l x)
-(Or16 x l:(Or16 _ _)) && (x.Op != OpOr16 && x.Op != OpConst16) -> (Or16 l x)
-(Or8  x l:(Or8  _ _)) && (x.Op != OpOr8  && x.Op != OpConst8)  -> (Or8  l x)
-(Xor64 x l:(Xor64 _ _)) && (x.Op != OpXor64 && x.Op != OpConst64) -> (Xor64 l x)
-(Xor32 x l:(Xor32 _ _)) && (x.Op != OpXor32 && x.Op != OpConst32) -> (Xor32 l x)
-(Xor16 x l:(Xor16 _ _)) && (x.Op != OpXor16 && x.Op != OpConst16) -> (Xor16 l x)
-(Xor8  x l:(Xor8  _ _)) && (x.Op != OpXor8  && x.Op != OpConst8)  -> (Xor8  l x)
-(Mul64 x l:(Mul64 _ _)) && (x.Op != OpMul64 && x.Op != OpConst64) -> (Mul64 l x)
-(Mul32 x l:(Mul32 _ _)) && (x.Op != OpMul32 && x.Op != OpConst32) -> (Mul32 l x)
-(Mul16 x l:(Mul16 _ _)) && (x.Op != OpMul16 && x.Op != OpConst16) -> (Mul16 l x)
-(Mul8  x l:(Mul8  _ _)) && (x.Op != OpMul8  && x.Op != OpConst8)  -> (Mul8  l x)
-
 // x + (C + z) -> C + (x + z)
 (Add64 (Add64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Add64 i (Add64 <t> z x))
 (Add32 (Add32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) -> (Add32 i (Add32 <t> z x))
 
 // floating point optimizations
 (Add32F x (Const32F [0])) -> x
-(Add32F (Const32F [0]) x) -> x
 (Add64F x (Const64F [0])) -> x
-(Add64F (Const64F [0]) x) -> x
 (Sub32F x (Const32F [0])) -> x
 (Sub64F x (Const64F [0])) -> x
 (Mul32F x (Const32F [f2i(1)])) -> x
-(Mul32F (Const32F [f2i(1)]) x) -> x
 (Mul64F x (Const64F [f2i(1)])) -> x
-(Mul64F (Const64F [f2i(1)]) x) -> x
 (Mul32F x (Const32F [f2i(-1)])) -> (Neg32F x)
-(Mul32F (Const32F [f2i(-1)]) x) -> (Neg32F x)
 (Mul64F x (Const64F [f2i(-1)])) -> (Neg64F x)
-(Mul64F (Const64F [f2i(-1)]) x) -> (Neg64F x)
 (Mul32F x (Const32F [f2i(2)])) -> (Add32F x x)
 (Mul64F x (Const64F [f2i(2)])) -> (Add64F x x)
 (Div32F x (Const32F <t> [c])) && reciprocalExact32(float32(i2f(c))) -> (Mul32F x (Const32F <t> [f2i(1/i2f(c))]))
index a5ac62829ee09e5fb28ebecdf0959858ec758391..7991f32679c35512b42d1c9763a387c57672b7a9 100644 (file)
@@ -28,8 +28,8 @@ var genericOps = []opData{
        {name: "Add32", argLength: 2, commutative: true},
        {name: "Add64", argLength: 2, commutative: true},
        {name: "AddPtr", argLength: 2}, // For address calculations.  arg0 is a pointer and arg1 is an int.
-       {name: "Add32F", argLength: 2},
-       {name: "Add64F", argLength: 2},
+       {name: "Add32F", argLength: 2, commutative: true},
+       {name: "Add64F", argLength: 2, commutative: true},
 
        {name: "Sub8", argLength: 2}, // arg0 - arg1
        {name: "Sub16", argLength: 2},
@@ -43,24 +43,25 @@ var genericOps = []opData{
        {name: "Mul16", argLength: 2, commutative: true},
        {name: "Mul32", argLength: 2, commutative: true},
        {name: "Mul64", argLength: 2, commutative: true},
-       {name: "Mul32F", argLength: 2},
-       {name: "Mul64F", argLength: 2},
+       {name: "Mul32F", argLength: 2, commutative: true},
+       {name: "Mul64F", argLength: 2, commutative: true},
 
        {name: "Div32F", argLength: 2}, // arg0 / arg1
        {name: "Div64F", argLength: 2},
 
-       {name: "Hmul32", argLength: 2},
-       {name: "Hmul32u", argLength: 2},
-       {name: "Hmul64", argLength: 2},
-       {name: "Hmul64u", argLength: 2},
+       {name: "Hmul32", argLength: 2, commutative: true},
+       {name: "Hmul32u", argLength: 2, commutative: true},
+       {name: "Hmul64", argLength: 2, commutative: true},
+       {name: "Hmul64u", argLength: 2, commutative: true},
 
-       {name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
-       {name: "Mul64uhilo", argLength: 2, typ: "(UInt64,UInt64)"}, // arg0 * arg1, returns (hi, lo)
+       {name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)", commutative: true}, // arg0 * arg1, returns (hi, lo)
+       {name: "Mul64uhilo", argLength: 2, typ: "(UInt64,UInt64)", commutative: true}, // arg0 * arg1, returns (hi, lo)
 
        // Weird special instructions for use in the strength reduction of divides.
        // These ops compute unsigned (arg0 + arg1) / 2, correct to all
        // 32/64 bits, even when the intermediate result of the add has 33/65 bits.
        // These ops can assume arg0 >= arg1.
+       // Note: these ops aren't commutative!
        {name: "Avg32u", argLength: 2, typ: "UInt32"}, // 32-bit platforms only
        {name: "Avg64u", argLength: 2, typ: "UInt64"}, // 64-bit platforms only
 
@@ -159,8 +160,8 @@ var genericOps = []opData{
        {name: "EqPtr", argLength: 2, commutative: true, typ: "Bool"},
        {name: "EqInter", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
        {name: "EqSlice", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-       {name: "Eq32F", argLength: 2, typ: "Bool"},
-       {name: "Eq64F", argLength: 2, typ: "Bool"},
+       {name: "Eq32F", argLength: 2, commutative: true, typ: "Bool"},
+       {name: "Eq64F", argLength: 2, commutative: true, typ: "Bool"},
 
        {name: "Neq8", argLength: 2, commutative: true, typ: "Bool"}, // arg0 != arg1
        {name: "Neq16", argLength: 2, commutative: true, typ: "Bool"},
@@ -169,8 +170,8 @@ var genericOps = []opData{
        {name: "NeqPtr", argLength: 2, commutative: true, typ: "Bool"},
        {name: "NeqInter", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
        {name: "NeqSlice", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend
-       {name: "Neq32F", argLength: 2, typ: "Bool"},
-       {name: "Neq64F", argLength: 2},
+       {name: "Neq32F", argLength: 2, commutative: true, typ: "Bool"},
+       {name: "Neq64F", argLength: 2, commutative: true, typ: "Bool"},
 
        {name: "Less8", argLength: 2, typ: "Bool"},  // arg0 < arg1, signed
        {name: "Less8U", argLength: 2, typ: "Bool"}, // arg0 < arg1, unsigned
index d51c22ce11dc834924e505f88b11d4d619e37687..35c5313a664735d9acca6770f08f8f1db7547696 100644 (file)
@@ -30,7 +30,7 @@ import (
 //  sexpr [&& extra conditions] -> [@block] sexpr
 //
 // sexpr are s-expressions (lisp-like parenthesized groupings)
-// sexpr ::= (opcode sexpr*)
+// sexpr ::= [variable:](opcode sexpr*)
 //         | variable
 //         | <type>
 //         | [auxint]
@@ -39,7 +39,7 @@ import (
 // aux      ::= variable | {code}
 // type     ::= variable | {code}
 // variable ::= some token
-// opcode   ::= one of the opcodes from ../op.go (without the Op prefix)
+// opcode   ::= one of the opcodes from the *Ops.go files
 
 // extra conditions is just a chunk of Go that evaluates to a boolean. It may use
 // variables declared in the matching sexpr. The variable "v" is predefined to be
@@ -119,15 +119,17 @@ func genRules(arch arch) {
                }
 
                loc := fmt.Sprintf("%s.rules:%d", arch.name, ruleLineno)
-               r := Rule{rule: rule, loc: loc}
-               if rawop := strings.Split(rule, " ")[0][1:]; isBlock(rawop, arch) {
-                       blockrules[rawop] = append(blockrules[rawop], r)
-               } else {
-                       // Do fancier value op matching.
-                       match, _, _ := r.parse()
-                       op, oparch, _, _, _, _ := parseValue(match, arch, loc)
-                       opname := fmt.Sprintf("Op%s%s", oparch, op.name)
-                       oprules[opname] = append(oprules[opname], r)
+               for _, crule := range commute(rule, arch) {
+                       r := Rule{rule: crule, loc: loc}
+                       if rawop := strings.Split(crule, " ")[0][1:]; isBlock(rawop, arch) {
+                               blockrules[rawop] = append(blockrules[rawop], r)
+                       } else {
+                               // Do fancier value op matching.
+                               match, _, _ := r.parse()
+                               op, oparch, _, _, _, _ := parseValue(match, arch, loc)
+                               opname := fmt.Sprintf("Op%s%s", oparch, op.name)
+                               oprules[opname] = append(oprules[opname], r)
+                       }
                }
                rule = ""
                ruleLineno = 0
@@ -754,3 +756,169 @@ func isVariable(s string) bool {
        }
        return b
 }
+
+// commute returns all equivalent rules to r after applying all possible
+// argument swaps to the commutable ops in r.
+// Potentially exponential, be careful.
+func commute(r string, arch arch) []string {
+       match, cond, result := Rule{rule: r}.parse()
+       a := commute1(match, varCount(match), arch)
+       for i, m := range a {
+               if cond != "" {
+                       m += " && " + cond
+               }
+               m += " -> " + result
+               a[i] = m
+       }
+       if len(a) == 1 && normalizeWhitespace(r) != normalizeWhitespace(a[0]) {
+               fmt.Println(normalizeWhitespace(r))
+               fmt.Println(normalizeWhitespace(a[0]))
+               panic("commute() is not the identity for noncommuting rule")
+       }
+       if false && len(a) > 1 {
+               fmt.Println(r)
+               for _, x := range a {
+                       fmt.Println("  " + x)
+               }
+       }
+       return a
+}
+
+func commute1(m string, cnt map[string]int, arch arch) []string {
+       if m[0] == '<' || m[0] == '[' || m[0] == '{' || isVariable(m) {
+               return []string{m}
+       }
+       // Split up input.
+       var prefix string
+       colon := strings.Index(m, ":")
+       if colon >= 0 && isVariable(m[:colon]) {
+               prefix = m[:colon+1]
+               m = m[colon+1:]
+       }
+       if m[0] != '(' || m[len(m)-1] != ')' {
+               panic("non-compound expr in commute1: " + m)
+       }
+       s := split(m[1 : len(m)-1])
+       op := s[0]
+
+       // Figure out if the op is commutative or not.
+       commutative := false
+       for _, x := range genericOps {
+               if op == x.name {
+                       if x.commutative {
+                               commutative = true
+                       }
+                       break
+               }
+       }
+       if arch.name != "generic" {
+               for _, x := range arch.ops {
+                       if op == x.name {
+                               if x.commutative {
+                                       commutative = true
+                               }
+                               break
+                       }
+               }
+       }
+       var idx0, idx1 int
+       if commutative {
+               // Find indexes of two args we can swap.
+               for i, arg := range s {
+                       if i == 0 || arg[0] == '<' || arg[0] == '[' || arg[0] == '{' {
+                               continue
+                       }
+                       if idx0 == 0 {
+                               idx0 = i
+                               continue
+                       }
+                       if idx1 == 0 {
+                               idx1 = i
+                               break
+                       }
+               }
+               if idx1 == 0 {
+                       panic("couldn't find first two args of commutative op " + s[0])
+               }
+               if cnt[s[idx0]] == 1 && cnt[s[idx1]] == 1 || s[idx0] == s[idx1] && cnt[s[idx0]] == 2 {
+                       // When we have (Add x y) with no ther uses of x and y in the matching rule,
+                       // then we can skip the commutative match (Add y x).
+                       commutative = false
+               }
+       }
+
+       // Recursively commute arguments.
+       a := make([][]string, len(s))
+       for i, arg := range s {
+               a[i] = commute1(arg, cnt, arch)
+       }
+
+       // Choose all possibilities from all args.
+       r := crossProduct(a)
+
+       // If commutative, do that again with its two args reversed.
+       if commutative {
+               a[idx0], a[idx1] = a[idx1], a[idx0]
+               r = append(r, crossProduct(a)...)
+       }
+
+       // Construct result.
+       for i, x := range r {
+               r[i] = prefix + "(" + x + ")"
+       }
+       return r
+}
+
+// varCount returns a map which counts the number of occurrences of
+// Value variables in m.
+func varCount(m string) map[string]int {
+       cnt := map[string]int{}
+       varCount1(m, cnt)
+       return cnt
+}
+func varCount1(m string, cnt map[string]int) {
+       if m[0] == '<' || m[0] == '[' || m[0] == '{' {
+               return
+       }
+       if isVariable(m) {
+               cnt[m]++
+               return
+       }
+       // Split up input.
+       colon := strings.Index(m, ":")
+       if colon >= 0 && isVariable(m[:colon]) {
+               cnt[m[:colon]]++
+               m = m[colon+1:]
+       }
+       if m[0] != '(' || m[len(m)-1] != ')' {
+               panic("non-compound expr in commute1: " + m)
+       }
+       s := split(m[1 : len(m)-1])
+       for _, arg := range s[1:] {
+               varCount1(arg, cnt)
+       }
+}
+
+// crossProduct returns all possible values
+// x[0][i] + " " + x[1][j] + " " + ... + " " + x[len(x)-1][k]
+// for all valid values of i, j, ..., k.
+func crossProduct(x [][]string) []string {
+       if len(x) == 1 {
+               return x[0]
+       }
+       var r []string
+       for _, tail := range crossProduct(x[1:]) {
+               for _, first := range x[0] {
+                       r = append(r, first+" "+tail)
+               }
+       }
+       return r
+}
+
+// normalizeWhitespace replaces 2+ whitespace sequences with a single space.
+func normalizeWhitespace(x string) string {
+       x = strings.Join(strings.Fields(x), " ")
+       x = strings.Replace(x, "( ", "(", -1)
+       x = strings.Replace(x, " )", ")", -1)
+       return x
+}
index 1b0225668417cef79a706788b8a67a07ff50e364..daeaf64c621a03515b46a3b7e1e57297ff59605a 100644 (file)
@@ -2476,6 +2476,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULL",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AIMULL,
                reg: regInfo{
@@ -2492,6 +2493,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULLU",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AMULL,
                reg: regInfo{
@@ -2508,6 +2510,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "MULLQU",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AMULL,
                reg: regInfo{
@@ -2855,9 +2858,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTL",
-               argLen: 2,
-               asm:    x86.ATESTL,
+               name:        "TESTL",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTL,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 255}, // AX CX DX BX SP BP SI DI
@@ -2866,9 +2870,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTW",
-               argLen: 2,
-               asm:    x86.ATESTW,
+               name:        "TESTW",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 255}, // AX CX DX BX SP BP SI DI
@@ -2877,9 +2882,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTB",
-               argLen: 2,
-               asm:    x86.ATESTB,
+               name:        "TESTB",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 255}, // AX CX DX BX SP BP SI DI
@@ -3659,10 +3665,11 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "LEAL1",
-               auxType:   auxSymOff,
-               argLen:    2,
-               symEffect: SymAddr,
+               name:        "LEAL1",
+               auxType:     auxSymOff,
+               argLen:      2,
+               commutative: true,
+               symEffect:   SymAddr,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3841,11 +3848,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVBloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVBLZX,
+               name:        "MOVBloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVBLZX,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3857,11 +3865,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVWloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVWLZX,
+               name:        "MOVWloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVWLZX,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3889,11 +3898,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVLloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVL,
+               name:        "MOVLloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVL,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3921,11 +3931,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVBstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVB,
+               name:        "MOVBstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3935,11 +3946,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVWstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVW,
+               name:        "MOVWstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -3963,11 +3975,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVLstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVL,
+               name:        "MOVLstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVL,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 255},   // AX CX DX BX SP BP SI DI
@@ -4950,6 +4963,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULQ",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AIMULQ,
                reg: regInfo{
@@ -4966,6 +4980,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULL",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AIMULL,
                reg: regInfo{
@@ -4982,6 +4997,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULQU",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AMULQ,
                reg: regInfo{
@@ -4998,6 +5014,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "HMULLU",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AMULL,
                reg: regInfo{
@@ -5126,6 +5143,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "MULQU2",
                argLen:       2,
+               commutative:  true,
                clobberFlags: true,
                asm:          x86.AMULQ,
                reg: regInfo{
@@ -5509,9 +5527,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTQ",
-               argLen: 2,
-               asm:    x86.ATESTQ,
+               name:        "TESTQ",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -5520,9 +5539,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTL",
-               argLen: 2,
-               asm:    x86.ATESTL,
+               name:        "TESTL",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTL,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -5531,9 +5551,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTW",
-               argLen: 2,
-               asm:    x86.ATESTW,
+               name:        "TESTW",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -5542,9 +5563,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "TESTB",
-               argLen: 2,
-               asm:    x86.ATESTB,
+               name:        "TESTB",
+               argLen:      2,
+               commutative: true,
+               asm:         x86.ATESTB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -5630,7 +5652,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SHLQconst",
-               auxType:      auxInt64,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5646,7 +5668,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SHLLconst",
-               auxType:      auxInt32,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5726,7 +5748,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SHRQconst",
-               auxType:      auxInt64,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5742,7 +5764,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SHRLconst",
-               auxType:      auxInt32,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5758,7 +5780,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SHRWconst",
-               auxType:      auxInt16,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5854,7 +5876,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SARQconst",
-               auxType:      auxInt64,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5870,7 +5892,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SARLconst",
-               auxType:      auxInt32,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5886,7 +5908,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SARWconst",
-               auxType:      auxInt16,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5918,7 +5940,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "ROLQconst",
-               auxType:      auxInt64,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5934,7 +5956,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "ROLLconst",
-               auxType:      auxInt32,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -5950,7 +5972,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "ROLWconst",
-               auxType:      auxInt16,
+               auxType:      auxInt8,
                argLen:       1,
                resultInArg0: true,
                clobberFlags: true,
@@ -6808,10 +6830,11 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "LEAQ1",
-               auxType:   auxSymOff,
-               argLen:    2,
-               symEffect: SymAddr,
+               name:        "LEAQ1",
+               auxType:     auxSymOff,
+               argLen:      2,
+               commutative: true,
+               symEffect:   SymAddr,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7082,11 +7105,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVBloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVBLZX,
+               name:        "MOVBloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVBLZX,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7098,11 +7122,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVWloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVWLZX,
+               name:        "MOVWloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVWLZX,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7130,11 +7155,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVLloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVL,
+               name:        "MOVLloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVL,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -7162,11 +7188,12 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVQloadidx1",
-               auxType:   auxSymOff,
-               argLen:    3,
-               symEffect: SymRead,
-               asm:       x86.AMOVQ,
+               name:        "MOVQloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -16561,9 +16588,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "NOR",
-               argLen: 2,
-               asm:    ppc64.ANOR,
+               name:        "NOR",
+               argLen:      2,
+               commutative: true,
+               asm:         ppc64.ANOR,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
@@ -18308,6 +18336,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "MULHD",
                argLen:       2,
+               commutative:  true,
                resultInArg0: true,
                clobberFlags: true,
                asm:          s390x.AMULHD,
@@ -18324,6 +18353,7 @@ var opcodeTable = [...]opInfo{
        {
                name:         "MULHDU",
                argLen:       2,
+               commutative:  true,
                resultInArg0: true,
                clobberFlags: true,
                asm:          s390x.AMULHDU,
@@ -18911,7 +18941,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "SLDconst",
-               auxType: auxInt64,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ASLD,
                reg: regInfo{
@@ -18925,7 +18955,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "SLWconst",
-               auxType: auxInt32,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ASLW,
                reg: regInfo{
@@ -18967,7 +18997,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "SRDconst",
-               auxType: auxInt64,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ASRD,
                reg: regInfo{
@@ -18981,7 +19011,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "SRWconst",
-               auxType: auxInt32,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ASRW,
                reg: regInfo{
@@ -19025,7 +19055,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SRADconst",
-               auxType:      auxInt64,
+               auxType:      auxInt8,
                argLen:       1,
                clobberFlags: true,
                asm:          s390x.ASRAD,
@@ -19040,7 +19070,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:         "SRAWconst",
-               auxType:      auxInt32,
+               auxType:      auxInt8,
                argLen:       1,
                clobberFlags: true,
                asm:          s390x.ASRAW,
@@ -19055,7 +19085,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "RLLGconst",
-               auxType: auxInt64,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ARLLG,
                reg: regInfo{
@@ -19069,7 +19099,7 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:    "RLLconst",
-               auxType: auxInt32,
+               auxType: auxInt8,
                argLen:  1,
                asm:     s390x.ARLL,
                reg: regInfo{
@@ -19889,6 +19919,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVBZloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVBZ,
@@ -19906,6 +19937,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVHZloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVHZ,
@@ -19923,6 +19955,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVWZloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVWZ,
@@ -19940,6 +19973,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVDloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVD,
@@ -19957,6 +19991,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVHBRloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVHBR,
@@ -19974,6 +20009,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVWBRloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVWBR,
@@ -19991,6 +20027,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVDBRloadidx",
                auxType:      auxSymOff,
                argLen:       3,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymRead,
                asm:          s390x.AMOVDBR,
@@ -20008,6 +20045,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVBstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVB,
@@ -20023,6 +20061,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVHstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVH,
@@ -20038,6 +20077,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVWstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVW,
@@ -20053,6 +20093,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVDstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVD,
@@ -20068,6 +20109,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVHBRstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVHBR,
@@ -20083,6 +20125,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVWBRstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVWBR,
@@ -20098,6 +20141,7 @@ var opcodeTable = [...]opInfo{
                name:         "MOVDBRstoreidx",
                auxType:      auxSymOff,
                argLen:       4,
+               commutative:  true,
                clobberFlags: true,
                symEffect:    SymWrite,
                asm:          s390x.AMOVDBR,
@@ -20672,14 +20716,16 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "Add32F",
-               argLen:  2,
-               generic: true,
+               name:        "Add32F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Add64F",
-               argLen:  2,
-               generic: true,
+               name:        "Add64F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
                name:    "Sub8",
@@ -20741,14 +20787,16 @@ var opcodeTable = [...]opInfo{
                generic:     true,
        },
        {
-               name:    "Mul32F",
-               argLen:  2,
-               generic: true,
+               name:        "Mul32F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Mul64F",
-               argLen:  2,
-               generic: true,
+               name:        "Mul64F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
                name:    "Div32F",
@@ -20761,34 +20809,40 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "Hmul32",
-               argLen:  2,
-               generic: true,
+               name:        "Hmul32",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Hmul32u",
-               argLen:  2,
-               generic: true,
+               name:        "Hmul32u",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Hmul64",
-               argLen:  2,
-               generic: true,
+               name:        "Hmul64",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Hmul64u",
-               argLen:  2,
-               generic: true,
+               name:        "Hmul64u",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Mul32uhilo",
-               argLen:  2,
-               generic: true,
+               name:        "Mul32uhilo",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Mul64uhilo",
-               argLen:  2,
-               generic: true,
+               name:        "Mul64uhilo",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
                name:    "Avg32u",
@@ -21238,14 +21292,16 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "Eq32F",
-               argLen:  2,
-               generic: true,
+               name:        "Eq32F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Eq64F",
-               argLen:  2,
-               generic: true,
+               name:        "Eq64F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
                name:        "Neq8",
@@ -21288,14 +21344,16 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "Neq32F",
-               argLen:  2,
-               generic: true,
+               name:        "Neq32F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
-               name:    "Neq64F",
-               argLen:  2,
-               generic: true,
+               name:        "Neq64F",
+               argLen:      2,
+               commutative: true,
+               generic:     true,
        },
        {
                name:    "Less8",
index 2aed1dcb2231e861e2dda65fabd08c0f4f4b1657..ee77124a6eb5756b4f439b50cab7e6393b4fe4e6 100644 (file)
@@ -622,6 +622,40 @@ func rewriteValue386_Op386ADCL(v *Value) bool {
                v.AddArg(f)
                return true
        }
+       // match: (ADCL (MOVLconst [c]) x f)
+       // cond:
+       // result: (ADCLconst [c] x f)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               f := v.Args[2]
+               v.reset(Op386ADCLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(f)
+               return true
+       }
+       // match: (ADCL x (MOVLconst [c]) f)
+       // cond:
+       // result: (ADCLconst [c] x f)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               f := v.Args[2]
+               v.reset(Op386ADCLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(f)
+               return true
+       }
        return false
 }
 func rewriteValue386_Op386ADDL(v *Value) bool {
@@ -655,9 +689,9 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL (SHLLconst [c] x) (SHRLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [c   ] x)
+       // match: (ADDL (SHLLconst [c] x) (SHRLconst [d] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != Op386SHLLconst {
@@ -669,10 +703,11 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                if v_1.Op != Op386SHRLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(Op386ROLLconst)
@@ -680,34 +715,35 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL (SHRLconst [c] x) (SHLLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [32-c] x)
+       // match: (ADDL (SHRLconst [d] x) (SHLLconst [c] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRLconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(Op386ROLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
+       // match: (ADDL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -720,13 +756,11 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                if v_1.Op != Op386SHRWconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
                        break
                }
                v.reset(Op386ROLWconst)
@@ -734,38 +768,36 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
+       // match: (ADDL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
                        break
                }
                v.reset(Op386ROLWconst)
-               v.AuxInt = 16 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
+       // match: (ADDL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -778,13 +810,11 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                if v_1.Op != Op386SHRBconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
                        break
                }
                v.reset(Op386ROLBconst)
@@ -792,32 +822,30 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
+       // match: (ADDL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRBconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
                        break
                }
                v.reset(Op386ROLBconst)
-               v.AuxInt = 8 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -839,6 +867,24 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (SHLLconst [3] y) x)
+       // cond:
+       // result: (LEAL8 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 3 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(Op386LEAL8)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDL x (SHLLconst [2] y))
        // cond:
        // result: (LEAL4 x y)
@@ -857,6 +903,24 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (SHLLconst [2] y) x)
+       // cond:
+       // result: (LEAL4 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(Op386LEAL4)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDL x (SHLLconst [1] y))
        // cond:
        // result: (LEAL2 x y)
@@ -875,6 +939,24 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (SHLLconst [1] y) x)
+       // cond:
+       // result: (LEAL2 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(Op386LEAL2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDL x (ADDL y y))
        // cond:
        // result: (LEAL2 x y)
@@ -893,6 +975,24 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (ADDL y y) x)
+       // cond:
+       // result: (LEAL2 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               y := v_0.Args[0]
+               if y != v_0.Args[1] {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(Op386LEAL2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDL x (ADDL x y))
        // cond:
        // result: (LEAL2 y x)
@@ -929,6 +1029,42 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (ADDL (ADDL x y) x)
+       // cond:
+       // result: (LEAL2 y x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(Op386LEAL2)
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ADDL (ADDL y x) x)
+       // cond:
+       // result: (LEAL2 y x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(Op386LEAL2)
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
        // match: (ADDL (ADDLconst [c] x) y)
        // cond:
        // result: (LEAL1 [c] x y)
@@ -946,17 +1082,17 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (ADDL x (ADDLconst [c] y))
+       // match: (ADDL y (ADDLconst [c] x))
        // cond:
        // result: (LEAL1 [c] x y)
        for {
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386ADDLconst {
                        break
                }
                c := v_1.AuxInt
-               y := v_1.Args[0]
+               x := v_1.Args[0]
                v.reset(Op386LEAL1)
                v.AuxInt = c
                v.AddArg(x)
@@ -985,7 +1121,7 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (ADDL (LEAL [c] {s} x) y)
+       // match: (ADDL (LEAL [c] {s} y) x)
        // cond: x.Op != OpSB && y.Op != OpSB
        // result: (LEAL1 [c] {s} x y)
        for {
@@ -995,8 +1131,8 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                }
                c := v_0.AuxInt
                s := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                if !(x.Op != OpSB && y.Op != OpSB) {
                        break
                }
@@ -1022,6 +1158,21 @@ func rewriteValue386_Op386ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (NEGL y) x)
+       // cond:
+       // result: (SUBL x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386NEGL {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(Op386SUBL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        return false
 }
 func rewriteValue386_Op386ADDLcarry(v *Value) bool {
@@ -2098,20 +2249,20 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAL1 [c] {s} x (ADDLconst [d] y))
-       // cond: is32Bit(c+d)   && y.Op != OpSB
+       // match: (LEAL1 [c] {s} y (ADDLconst [d] x))
+       // cond: is32Bit(c+d)   && x.Op != OpSB
        // result: (LEAL1 [c+d] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386ADDLconst {
                        break
                }
                d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is32Bit(c+d) && y.Op != OpSB) {
+               x := v_1.Args[0]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
                v.reset(Op386LEAL1)
@@ -2143,9 +2294,9 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAL1 [c] {s} (SHLLconst [1] x) y)
+       // match: (LEAL1 [c] {s} (SHLLconst [1] y) x)
        // cond:
-       // result: (LEAL2 [c] {s} y x)
+       // result: (LEAL2 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -2156,13 +2307,13 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                if v_0.AuxInt != 1 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(Op386LEAL2)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAL1 [c] {s} x (SHLLconst [2] y))
@@ -2187,9 +2338,9 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAL1 [c] {s} (SHLLconst [2] x) y)
+       // match: (LEAL1 [c] {s} (SHLLconst [2] y) x)
        // cond:
-       // result: (LEAL4 [c] {s} y x)
+       // result: (LEAL4 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -2200,13 +2351,13 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                if v_0.AuxInt != 2 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(Op386LEAL4)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAL1 [c] {s} x (SHLLconst [3] y))
@@ -2231,9 +2382,9 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAL1 [c] {s} (SHLLconst [3] x) y)
+       // match: (LEAL1 [c] {s} (SHLLconst [3] y) x)
        // cond:
-       // result: (LEAL8 [c] {s} y x)
+       // result: (LEAL8 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -2244,13 +2395,13 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                if v_0.AuxInt != 3 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(Op386LEAL8)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAL1 [off1] {sym1} (LEAL [off2] {sym2} x) y)
@@ -2277,21 +2428,21 @@ func rewriteValue386_Op386LEAL1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAL1 [off1] {sym1} x (LEAL [off2] {sym2} y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB
+       // match: (LEAL1 [off1] {sym1} y (LEAL [off2] {sym2} x))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
        // result: (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386LEAL {
                        break
                }
                off2 := v_1.AuxInt
                sym2 := v_1.Aux
-               y := v_1.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB) {
+               x := v_1.Args[0]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
                v.reset(Op386LEAL1)
@@ -2767,7 +2918,7 @@ func rewriteValue386_Op386MOVBload(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBload  [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -2790,7 +2941,7 @@ func rewriteValue386_Op386MOVBload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
        // result: (MOVBload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -2889,6 +3040,28 @@ func rewriteValue386_Op386MOVBloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBloadidx1 [c] {sym} idx (ADDLconst [d] ptr) mem)
+       // cond:
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
        // cond:
        // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
@@ -2911,6 +3084,28 @@ func rewriteValue386_Op386MOVBloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBloadidx1 [c] {sym} (ADDLconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValue386_Op386MOVBstore(v *Value) bool {
@@ -2960,7 +3155,7 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // match: (MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -3008,7 +3203,7 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // match: (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
        // result: (MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -3417,19 +3612,19 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVBstoreidx1 [c] {sym} idx (ADDLconst [d] ptr) val mem)
        // cond:
        // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
-               ptr := v.Args[0]
+               idx := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386ADDLconst {
                        break
                }
                d := v_1.AuxInt
-               idx := v_1.Args[0]
+               ptr := v_1.Args[0]
                val := v.Args[2]
                mem := v.Args[3]
                v.reset(Op386MOVBstoreidx1)
@@ -3441,13 +3636,61 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       // match: (MOVBstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVBstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [c] {sym} (ADDLconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVBstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != Op386SHRLconst {
@@ -3489,9 +3732,9 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} idx p w mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
@@ -3501,7 +3744,9 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                if v_2.Op != Op386SHRLconst {
                        break
                }
-               j := v_2.AuxInt
+               if v_2.AuxInt != 8 {
+                       break
+               }
                w := v_2.Args[0]
                x := v.Args[3]
                if x.Op != Op386MOVBstoreidx1 {
@@ -3513,20 +3758,13 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                if x.Aux != s {
                        break
                }
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != Op386SHRLconst {
+               if idx != x.Args[0] {
                        break
                }
-               if w0.AuxInt != j-8 {
+               if p != x.Args[1] {
                        break
                }
-               if w != w0.Args[0] {
+               if w != x.Args[2] {
                        break
                }
                mem := x.Args[3]
@@ -3538,555 +3776,452 @@ func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool {
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVLload(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: x
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLstore {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if v_2.AuxInt != 8 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLload  [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if x.AuxInt != i-1 {
                        break
                }
-               v.reset(Op386MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               if x.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if p != x.Args[0] {
                        break
                }
-               v.reset(Op386MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL1 {
+               if idx != x.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if w != x.Args[2] {
                        break
                }
-               v.reset(Op386MOVLloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL4 {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if v_2.AuxInt != 8 {
                        break
                }
-               v.reset(Op386MOVLloadidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off] {sym} (ADDL ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVLloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDL {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               if x.AuxInt != i-1 {
                        break
                }
-               v.reset(Op386MOVLloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVLloadidx1(v *Value) bool {
-       // match: (MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
-       // cond:
-       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
-                       break
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
                }
-               if v_1.AuxInt != 2 {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVLloadidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if x.AuxInt != i-1 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVLloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               if x.Aux != s {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVLloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVLloadidx4(v *Value) bool {
-       // match: (MOVLloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVLloadidx4 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if p != x.Args[0] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVLloadidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVLloadidx4 [c+4*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               if idx != x.Args[1] {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVLloadidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg(ptr)
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVLstore(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVLstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLstore  [off1+off2] {sym} ptr val mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} idx p w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLconst {
+               if x.AuxInt != i-1 {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off)) {
+               if x.Aux != s {
                        break
                }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = makeValAndOff(int64(int32(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               if idx != x.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if p != x.Args[1] {
                        break
                }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL1 {
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if w0.AuxInt != j-8 {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL4 {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               v.reset(Op386MOVLstoreidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore [off] {sym} (ADDL ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem)
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} idx p w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDL {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVLstoreconst(v *Value) bool {
+func rewriteValue386_Op386MOVLload(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: x
        for {
-               sc := v.AuxInt
-               s := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVLload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDLconst {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                ptr := v_0.Args[0]
                mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
+               v.reset(Op386MOVLload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               sc := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386LEAL {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               ptr := v_0.Args[0]
+               base := v_0.Args[0]
                mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
+               v.reset(Op386MOVLload)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
+               v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVLload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               x := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386LEAL1 {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
+               v.reset(Op386MOVLloadidx1)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVLload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               x := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386LEAL4 {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(off)
+               v.reset(Op386MOVLloadidx4)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [x] {sym} (ADDL ptr idx) mem)
-       // cond:
-       // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem)
+       // match: (MOVLload [off] {sym} (ADDL ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVLloadidx1 [off] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDL {
@@ -4095,8 +4230,11 @@ func rewriteValue386_Op386MOVLstoreconst(v *Value) bool {
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = x
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(Op386MOVLloadidx1)
+               v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
@@ -4105,10 +4243,10 @@ func rewriteValue386_Op386MOVLstoreconst(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool {
-       // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
+func rewriteValue386_Op386MOVLloadidx1(v *Value) bool {
+       // match: (MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
        // cond:
-       // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
+       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -4122,7 +4260,7 @@ func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool {
                }
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(Op386MOVLstoreconstidx4)
+               v.reset(Op386MOVLloadidx4)
                v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
@@ -4130,129 +4268,99 @@ func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
+       // match: (MOVLloadidx1 [c] {sym} (SHLLconst [2] idx) ptr mem)
        // cond:
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if v_0.Op != Op386SHLLconst {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
+               v.reset(Op386MOVLloadidx4)
+               v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
+       // match: (MOVLloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
        // cond:
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
                mem := v.Args[2]
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
+               v.reset(Op386MOVLloadidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVLstoreconstidx4(v *Value) bool {
-       // match: (MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem)
+       // match: (MOVLloadidx1 [c] {sym} idx (ADDLconst [d] ptr) mem)
        // cond:
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
-               ptr := v.Args[0]
+               idx := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386ADDLconst {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(Op386MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(4 * c)
+               v.reset(Op386MOVLloadidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVLstoreidx1(v *Value) bool {
-       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem)
+       // match: (MOVLloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
        // cond:
-       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
-                       break
-               }
-               if v_1.AuxInt != 2 {
+               if v_1.Op != Op386ADDLconst {
                        break
                }
+               d := v_1.AuxInt
                idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVLstoreidx4)
-               v.AuxInt = c
+               mem := v.Args[2]
+               v.reset(Op386MOVLloadidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // match: (MOVLloadidx1 [c] {sym} (ADDLconst [d] idx) ptr mem)
        // cond:
-       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -4261,49 +4369,23 @@ func rewriteValue386_Op386MOVLstoreidx1(v *Value) bool {
                        break
                }
                d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-       // cond:
-       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVLstoreidx1)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVLloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool {
-       // match: (MOVLstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+func rewriteValue386_Op386MOVLloadidx4(v *Value) bool {
+       // match: (MOVLloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
        // cond:
-       // result: (MOVLstoreidx4 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLloadidx4 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -4314,20 +4396,18 @@ func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool {
                d := v_0.AuxInt
                ptr := v_0.Args[0]
                idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVLstoreidx4)
+               mem := v.Args[2]
+               v.reset(Op386MOVLloadidx4)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVLloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
        // cond:
-       // result: (MOVLstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       // result: (MOVLloadidx4 [c+4*d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -4338,50 +4418,25 @@ func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool {
                }
                d := v_1.AuxInt
                idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVLstoreidx4)
+               mem := v.Args[2]
+               v.reset(Op386MOVLloadidx4)
                v.AuxInt = c + 4*d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVSDconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (MOVSDconst [c])
-       // cond: config.ctxt.Flag_shared
-       // result: (MOVSDconst2 (MOVSDconst1 [c]))
-       for {
-               c := v.AuxInt
-               if !(config.ctxt.Flag_shared) {
-                       break
-               }
-               v.reset(Op386MOVSDconst2)
-               v0 := b.NewValue0(v.Pos, Op386MOVSDconst1, types.UInt32)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVSDload(v *Value) bool {
+func rewriteValue386_Op386MOVLstore(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVSDload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVSDload [off1+off2] {sym} ptr mem)
+       // result: (MOVLstore  [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -4391,20 +4446,45 @@ func rewriteValue386_Op386MOVSDload(v *Value) bool {
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(Op386MOVSDload)
+               v.reset(Op386MOVLstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreconst)
+               v.AuxInt = makeValAndOff(int64(int32(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // result: (MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -4415,20 +4495,22 @@ func rewriteValue386_Op386MOVSDload(v *Value) bool {
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                base := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(Op386MOVSDload)
+               v.reset(Op386MOVLstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
+       // match: (MOVLstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -4440,47 +4522,51 @@ func rewriteValue386_Op386MOVSDload(v *Value) bool {
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSDloadidx1)
+               v.reset(Op386MOVLstoreidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem)
+       // match: (MOVLstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL8 {
+               if v_0.Op != Op386LEAL4 {
                        break
                }
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSDloadidx8)
+               v.reset(Op386MOVLstoreidx4)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDload [off] {sym} (ADDL ptr idx) mem)
+       // match: (MOVLstore [off] {sym} (ADDL ptr idx) val mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVSDloadidx1 [off] {sym} ptr idx mem)
+       // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -4490,231 +4576,131 @@ func rewriteValue386_Op386MOVSDload(v *Value) bool {
                }
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(Op386MOVSDloadidx1)
+               v.reset(Op386MOVLstoreidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVSDloadidx1(v *Value) bool {
-       // match: (MOVSDloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
+func rewriteValue386_Op386MOVLstoreconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
+               sc := v.AuxInt
+               s := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDLconst {
                        break
                }
-               d := v_0.AuxInt
+               off := v_0.AuxInt
                ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVSDloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVSDloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVSDloadidx8(v *Value) bool {
-       // match: (MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVSDloadidx8 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVSDloadidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVSDloadidx8 [c+8*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVSDloadidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVSDstore(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(Op386MOVSDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
+               sc := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386LEAL {
                        break
                }
-               off2 := v_0.AuxInt
+               off := v_0.AuxInt
                sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(Op386MOVSDstore)
-               v.AuxInt = off1 + off2
+               v.reset(Op386MOVLstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (MOVLstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               off1 := v.AuxInt
+               x := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386LEAL1 {
                        break
                }
-               off2 := v_0.AuxInt
+               off := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSDstoreidx1)
-               v.AuxInt = off1 + off2
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(off)
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               off1 := v.AuxInt
+               x := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL8 {
+               if v_0.Op != Op386LEAL4 {
                        break
                }
-               off2 := v_0.AuxInt
+               off := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSDstoreidx8)
-               v.AuxInt = off1 + off2
+               v.reset(Op386MOVLstoreconstidx4)
+               v.AuxInt = ValAndOff(x).add(off)
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstore [off] {sym} (ADDL ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem)
+       // match: (MOVLstoreconst [x] {sym} (ADDL ptr idx) mem)
+       // cond:
+       // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem)
        for {
-               off := v.AuxInt
+               x := v.AuxInt
                sym := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDL {
@@ -4722,103 +4708,241 @@ func rewriteValue386_Op386MOVSDstore(v *Value) bool {
                }
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
-                       break
-               }
-               v.reset(Op386MOVSDstoreidx1)
-               v.AuxInt = off
+               mem := v.Args[1]
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = x
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVSDstoreidx1(v *Value) bool {
-       // match: (MOVSDstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool {
+       // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem)
        // cond:
-       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVLstoreconstidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDLconst {
                        break
                }
-               d := v_0.AuxInt
+               c := v_0.AuxInt
                ptr := v_0.Args[0]
                idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVSDstoreidx1)
-               v.AuxInt = c + d
+               mem := v.Args[2]
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
        // cond:
-       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
-               c := v.AuxInt
+               x := v.AuxInt
                sym := v.Aux
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386ADDLconst {
                        break
                }
-               d := v_1.AuxInt
+               c := v_1.AuxInt
                idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVSDstoreidx1)
-               v.AuxInt = c + d
+               mem := v.Args[2]
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool {
-       // match: (MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+func rewriteValue386_Op386MOVLstoreconstidx4(v *Value) bool {
+       // match: (MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem)
        // cond:
-       // result: (MOVSDstoreidx8 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
-               c := v.AuxInt
+               x := v.AuxInt
                sym := v.Aux
                v_0 := v.Args[0]
                if v_0.Op != Op386ADDLconst {
                        break
                }
-               d := v_0.AuxInt
+               c := v_0.AuxInt
                ptr := v_0.Args[0]
                idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVSDstoreidx8)
-               v.AuxInt = c + d
+               mem := v.Args[2]
+               v.reset(Op386MOVLstoreconstidx4)
+               v.AuxInt = ValAndOff(x).add(c)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem)
        // cond:
-       // result: (MOVSDstoreidx8 [c+8*d] {sym} ptr idx val mem)
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
        for {
-               c := v.AuxInt
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVLstoreconstidx4)
+               v.AuxInt = ValAndOff(x).add(4 * c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVLstoreidx1(v *Value) bool {
+       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} (SHLLconst [2] idx) ptr val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} idx (ADDLconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
                sym := v.Aux
                ptr := v.Args[0]
                v_1 := v.Args[1]
@@ -4829,8 +4953,32 @@ func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool {
                idx := v_1.Args[0]
                val := v.Args[2]
                mem := v.Args[3]
-               v.reset(Op386MOVSDstoreidx8)
-               v.AuxInt = c + 8*d
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} (ADDLconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
@@ -4840,37 +4988,88 @@ func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSconst(v *Value) bool {
+func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool {
+       // match: (MOVLstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx4)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVLstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVLstoreidx4)
+               v.AuxInt = c + 4*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSDconst(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (MOVSSconst [c])
+       // match: (MOVSDconst [c])
        // cond: config.ctxt.Flag_shared
-       // result: (MOVSSconst2 (MOVSSconst1 [c]))
+       // result: (MOVSDconst2 (MOVSDconst1 [c]))
        for {
                c := v.AuxInt
                if !(config.ctxt.Flag_shared) {
                        break
                }
-               v.reset(Op386MOVSSconst2)
-               v0 := b.NewValue0(v.Pos, Op386MOVSSconst1, types.UInt32)
+               v.reset(Op386MOVSDconst2)
+               v0 := b.NewValue0(v.Pos, Op386MOVSDconst1, types.UInt32)
                v0.AuxInt = c
                v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValue386_Op386MOVSSload(v *Value) bool {
+func rewriteValue386_Op386MOVSDload(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVSSload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVSDload [off1] {sym} (ADDLconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVSSload [off1+off2] {sym} ptr mem)
+       // result: (MOVSDload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -4884,16 +5083,16 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(Op386MOVSSload)
+               v.reset(Op386MOVSDload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -4908,16 +5107,16 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(Op386MOVSSload)
+               v.reset(Op386MOVSDload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
+       // match: (MOVSDload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -4933,7 +5132,7 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSSloadidx1)
+               v.reset(Op386MOVSDloadidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -4941,14 +5140,14 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
+       // match: (MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL4 {
+               if v_0.Op != Op386LEAL8 {
                        break
                }
                off2 := v_0.AuxInt
@@ -4959,7 +5158,7 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSSloadidx4)
+               v.reset(Op386MOVSDloadidx8)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -4967,9 +5166,9 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off] {sym} (ADDL ptr idx) mem)
+       // match: (MOVSDload [off] {sym} (ADDL ptr idx) mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVSSloadidx1 [off] {sym} ptr idx mem)
+       // result: (MOVSDloadidx1 [off] {sym} ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -4983,7 +5182,7 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(Op386MOVSSloadidx1)
+               v.reset(Op386MOVSDloadidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -4993,10 +5192,10 @@ func rewriteValue386_Op386MOVSSload(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
-       // match: (MOVSSloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
+func rewriteValue386_Op386MOVSDloadidx1(v *Value) bool {
+       // match: (MOVSDloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
        // cond:
-       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5008,7 +5207,7 @@ func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
                ptr := v_0.Args[0]
                idx := v.Args[1]
                mem := v.Args[2]
-               v.reset(Op386MOVSSloadidx1)
+               v.reset(Op386MOVSDloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5016,9 +5215,9 @@ func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // match: (MOVSDloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
        // cond:
-       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5030,7 +5229,7 @@ func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
                d := v_1.AuxInt
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(Op386MOVSSloadidx1)
+               v.reset(Op386MOVSDloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5040,10 +5239,10 @@ func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
-       // match: (MOVSSloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
+func rewriteValue386_Op386MOVSDloadidx8(v *Value) bool {
+       // match: (MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem)
        // cond:
-       // result: (MOVSSloadidx4 [c+d] {sym} ptr idx mem)
+       // result: (MOVSDloadidx8 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5055,7 +5254,7 @@ func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
                ptr := v_0.Args[0]
                idx := v.Args[1]
                mem := v.Args[2]
-               v.reset(Op386MOVSSloadidx4)
+               v.reset(Op386MOVSDloadidx8)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5063,9 +5262,9 @@ func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // match: (MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem)
        // cond:
-       // result: (MOVSSloadidx4 [c+4*d] {sym} ptr idx mem)
+       // result: (MOVSDloadidx8 [c+8*d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5077,8 +5276,8 @@ func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
                d := v_1.AuxInt
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(Op386MOVSSloadidx4)
-               v.AuxInt = c + 4*d
+               v.reset(Op386MOVSDloadidx8)
+               v.AuxInt = c + 8*d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
@@ -5087,14 +5286,14 @@ func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSstore(v *Value) bool {
+func rewriteValue386_Op386MOVSDstore(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // match: (MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -5109,7 +5308,7 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(Op386MOVSSstore)
+               v.reset(Op386MOVSDstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
@@ -5117,9 +5316,9 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // match: (MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -5135,7 +5334,7 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(Op386MOVSSstore)
+               v.reset(Op386MOVSDstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -5143,9 +5342,9 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVSDstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -5162,7 +5361,7 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSSstoreidx1)
+               v.reset(Op386MOVSDstoreidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -5171,14 +5370,14 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL4 {
+               if v_0.Op != Op386LEAL8 {
                        break
                }
                off2 := v_0.AuxInt
@@ -5190,7 +5389,7 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(Op386MOVSSstoreidx4)
+               v.reset(Op386MOVSDstoreidx8)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -5199,9 +5398,9 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off] {sym} (ADDL ptr idx) val mem)
+       // match: (MOVSDstore [off] {sym} (ADDL ptr idx) val mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem)
+       // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -5216,7 +5415,7 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(Op386MOVSSstoreidx1)
+               v.reset(Op386MOVSDstoreidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -5227,10 +5426,10 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
-       // match: (MOVSSstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+func rewriteValue386_Op386MOVSDstoreidx1(v *Value) bool {
+       // match: (MOVSDstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
        // cond:
-       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5243,7 +5442,7 @@ func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
                idx := v.Args[1]
                val := v.Args[2]
                mem := v.Args[3]
-               v.reset(Op386MOVSSstoreidx1)
+               v.reset(Op386MOVSDstoreidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5252,9 +5451,9 @@ func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
        // cond:
-       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5267,7 +5466,7 @@ func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
                idx := v_1.Args[0]
                val := v.Args[2]
                mem := v.Args[3]
-               v.reset(Op386MOVSSstoreidx1)
+               v.reset(Op386MOVSDstoreidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5278,10 +5477,10 @@ func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
        }
        return false
 }
-func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
-       // match: (MOVSSstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool {
+       // match: (MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem)
        // cond:
-       // result: (MOVSSstoreidx4 [c+d] {sym} ptr idx val mem)
+       // result: (MOVSDstoreidx8 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5294,7 +5493,7 @@ func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
                idx := v.Args[1]
                val := v.Args[2]
                mem := v.Args[3]
-               v.reset(Op386MOVSSstoreidx4)
+               v.reset(Op386MOVSDstoreidx8)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -5303,9 +5502,9 @@ func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem)
        // cond:
-       // result: (MOVSSstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       // result: (MOVSDstoreidx8 [c+8*d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -5314,2199 +5513,6145 @@ func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
                if v_1.Op != Op386ADDLconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVSSstoreidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWLSX(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWLSX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != Op386MOVWload {
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVSDstoreidx8)
+               v.AuxInt = c + 8*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (MOVSSconst [c])
+       // cond: config.ctxt.Flag_shared
+       // result: (MOVSSconst2 (MOVSSconst1 [c]))
+       for {
+               c := v.AuxInt
+               if !(config.ctxt.Flag_shared) {
+                       break
+               }
+               v.reset(Op386MOVSSconst2)
+               v0 := b.NewValue0(v.Pos, Op386MOVSSconst1, types.UInt32)
+               v0.AuxInt = c
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSload(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVSSload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSSload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386MOVSSload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVSSload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVSSloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL4 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVSSloadidx4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSload [off] {sym} (ADDL ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVSSloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(Op386MOVSSloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool {
+       // match: (MOVSSloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVSSloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // cond:
+       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVSSloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool {
+       // match: (MOVSSloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVSSloadidx4 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVSSloadidx4)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // cond:
+       // result: (MOVSSloadidx4 [c+4*d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVSSloadidx4)
+               v.AuxInt = c + 4*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSstore(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386MOVSSstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVSSstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVSSstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL4 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVSSstoreidx4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off] {sym} (ADDL ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(Op386MOVSSstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool {
+       // match: (MOVSSstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVSSstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVSSstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool {
+       // match: (MOVSSstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVSSstoreidx4 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVSSstoreidx4)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVSSstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVSSstoreidx4)
+               v.AuxInt = c + 4*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWLSX(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWLSX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != Op386MOVWload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, Op386MOVWLSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWLSX (ANDLconst [c] x))
+       // cond: c & 0x8000 == 0
+       // result: (ANDLconst [c & 0x7fff] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ANDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c&0x8000 == 0) {
+                       break
+               }
+               v.reset(Op386ANDLconst)
+               v.AuxInt = c & 0x7fff
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWLSXload(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVWLSXload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWLZX(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWLZX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != Op386MOVWload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, Op386MOVWload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != Op386MOVWloadidx2 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx2, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWLZX (ANDLconst [c] x))
+       // cond:
+       // result: (ANDLconst [c & 0xffff] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ANDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(Op386ANDLconst)
+               v.AuxInt = c & 0xffff
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWload(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: x
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVWstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL2 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWloadidx2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off] {sym} (ADDL ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWloadidx1(v *Value) bool {
+       // match: (MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
+       // cond:
+       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} (SHLLconst [1] idx) ptr mem)
+       // cond:
+       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} idx (ADDLconst [d] ptr) mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx1 [c] {sym} (ADDLconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWloadidx2(v *Value) bool {
+       // match: (MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVWloadidx2 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx2)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem)
+       // cond:
+       // result: (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWloadidx2)
+               v.AuxInt = c + 2*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstore(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWstore [off] {sym} ptr (MOVWLSX x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVWLSX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWLZX x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVWLZX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreconst)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL2 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} (ADDL ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstore [i-2] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHRLconst {
+                       break
+               }
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != Op386MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstore [i-2] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != Op386MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstoreconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL1 {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386LEAL2 {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym} (ADDL ptr idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDL {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               v.reset(Op386MOVWstoreconstidx1)
+               v.AuxInt = x
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               x := v.Args[1]
+               if x.Op != Op386MOVWstoreconst {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               mem := x.Args[1]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreconst)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstoreconstidx1(v *Value) bool {
+       // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstoreconstidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != Op386MOVWstoreconstidx1 {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if i != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(i)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstoreconstidx2(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem)
+       // cond:
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(Op386MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(2 * c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
+       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != Op386MOVWstoreconstidx2 {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if i != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreconstidx1)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, i.Type)
+               v0.AuxInt = 1
+               v0.AddArg(i)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstoreidx1(v *Value) bool {
+       // match: (MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} (SHLLconst [1] idx) ptr val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} idx (ADDLconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} (ADDLconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} idx p w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} idx p (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} idx p w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVWstoreidx2(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx2)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(Op386MOVWstoreidx2)
+               v.AuxInt = c + 2*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx2 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVWstoreidx2 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MULL(v *Value) bool {
+       // match: (MULL x (MOVLconst [c]))
+       // cond:
+       // result: (MULLconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(Op386MULLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULL (MOVLconst [c]) x)
+       // cond:
+       // result: (MULLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(Op386MULLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MULLconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULLconst [c] (MULLconst [d] x))
+       // cond:
+       // result: (MULLconst [int64(int32(c * d))] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MULLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(Op386MULLconst)
+               v.AuxInt = int64(int32(c * d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [-1] x)
+       // cond:
+       // result: (NEGL x)
+       for {
+               if v.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386NEGL)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [0] _)
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               v.reset(Op386MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULLconst [1] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [3] x)
+       // cond:
+       // result: (LEAL2 x x)
+       for {
+               if v.AuxInt != 3 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL2)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [5] x)
+       // cond:
+       // result: (LEAL4 x x)
+       for {
+               if v.AuxInt != 5 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL4)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [7] x)
+       // cond:
+       // result: (LEAL8 (NEGL <v.Type> x) x)
+       for {
+               if v.AuxInt != 7 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL8)
+               v0 := b.NewValue0(v.Pos, Op386NEGL, v.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [9] x)
+       // cond:
+       // result: (LEAL8 x x)
+       for {
+               if v.AuxInt != 9 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL8)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [11] x)
+       // cond:
+       // result: (LEAL2 x (LEAL4 <v.Type> x x))
+       for {
+               if v.AuxInt != 11 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL2)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [13] x)
+       // cond:
+       // result: (LEAL4 x (LEAL2 <v.Type> x x))
+       for {
+               if v.AuxInt != 13 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [21] x)
+       // cond:
+       // result: (LEAL4 x (LEAL4 <v.Type> x x))
+       for {
+               if v.AuxInt != 21 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [25] x)
+       // cond:
+       // result: (LEAL8 x (LEAL2 <v.Type> x x))
+       for {
+               if v.AuxInt != 25 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [37] x)
+       // cond:
+       // result: (LEAL4 x (LEAL8 <v.Type> x x))
+       for {
+               if v.AuxInt != 37 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [41] x)
+       // cond:
+       // result: (LEAL8 x (LEAL4 <v.Type> x x))
+       for {
+               if v.AuxInt != 41 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [73] x)
+       // cond:
+       // result: (LEAL8 x (LEAL8 <v.Type> x x))
+       for {
+               if v.AuxInt != 73 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(Op386LEAL8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c)
+       // result: (SHLLconst [log2(c)] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(Op386SHLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c+1) && c >= 15
+       // result: (SUBL (SHLLconst <v.Type> [log2(c+1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c+1) && c >= 15) {
+                       break
+               }
+               v.reset(Op386SUBL)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-1) && c >= 17
+       // result: (LEAL1 (SHLLconst <v.Type> [log2(c-1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-1) && c >= 17) {
+                       break
+               }
+               v.reset(Op386LEAL1)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-2) && c >= 34
+       // result: (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-2) && c >= 34) {
+                       break
+               }
+               v.reset(Op386LEAL2)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 2)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-4) && c >= 68
+       // result: (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-4) && c >= 68) {
+                       break
+               }
+               v.reset(Op386LEAL4)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 4)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-8) && c >= 136
+       // result: (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-8) && c >= 136) {
+                       break
+               }
+               v.reset(Op386LEAL8)
+               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 8)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(Op386SHLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(Op386SHLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+                       break
+               }
+               v.reset(Op386SHLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [c] (MOVLconst [d]))
+       // cond:
+       // result: (MOVLconst [int64(int32(c*d))])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(Op386MOVLconst)
+               v.AuxInt = int64(int32(c * d))
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386NEGL(v *Value) bool {
+       // match: (NEGL (MOVLconst [c]))
+       // cond:
+       // result: (MOVLconst [int64(int32(-c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(Op386MOVLconst)
+               v.AuxInt = int64(int32(-c))
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386NOTL(v *Value) bool {
+       // match: (NOTL (MOVLconst [c]))
+       // cond:
+       // result: (MOVLconst [^c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(Op386MOVLconst)
+               v.AuxInt = ^c
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386ORL(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (ORL x (MOVLconst [c]))
+       // cond:
+       // result: (ORLconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(Op386ORLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (MOVLconst [c]) x)
+       // cond:
+       // result: (ORLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(Op386ORLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (SHLLconst [c] x) (SHRLconst [d] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHRLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(Op386ROLLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (SHRLconst [d] x) (SHLLconst [c] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHRLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(Op386ROLLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHRWconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
+                       break
+               }
+               v.reset(Op386ROLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHRWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
+                       break
+               }
+               v.reset(Op386ROLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHRBconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
+                       break
+               }
+               v.reset(Op386ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != Op386SHRBconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
+                       break
+               }
+               v.reset(Op386ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL x x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL x0:(MOVBload [i0] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != Op386MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != Op386MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWload [i0] {s} p mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBload {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)) x0:(MOVWload [i0] {s} p mem)) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBload {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)) o0:(ORL x0:(MOVWload [i0] {s} p mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBload {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               mem := x2.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)) x0:(MOVWload [i0] {s} p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBload {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               mem := x2.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1==i0+1   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 8 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if idx != x2.Args[0] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, Op386MOVWLSXload, v.Type)
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (MOVWLSX (ANDLconst [c] x))
-       // cond: c & 0x8000 == 0
-       // result: (ANDLconst [c & 0x7fff] x)
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ANDLconst {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c&0x8000 == 0) {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386ANDLconst)
-               v.AuxInt = c & 0x7fff
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWLSXload(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               if s0.AuxInt != 16 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               v.reset(Op386MOVWLSXload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWLZX(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWLZX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != Op386MOVWload {
+               i2 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, Op386MOVWload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
-       for {
-               x := v.Args[0]
-               if x.Op != Op386MOVWloadidx1 {
+               if p != x0.Args[0] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if idx != x0.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
                v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != Op386MOVWloadidx2 {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx2, v.Type)
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if idx != x2.Args[0] {
+                       break
+               }
+               if p != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
                v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (MOVWLZX (ANDLconst [c] x))
-       // cond:
-       // result: (ANDLconst [c & 0xffff] x)
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ANDLconst {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(Op386ANDLconst)
-               v.AuxInt = c & 0xffff
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWload(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: x
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVWstore {
+               if mem != x0.Args[2] {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWload  [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if s1.AuxInt != 24 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               v.reset(Op386MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               i3 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if idx != x2.Args[0] {
                        break
                }
-               v.reset(Op386MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL1 {
+               if p != x2.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if mem != x2.Args[2] {
                        break
                }
-               v.reset(Op386MOVWloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL2 {
+               o0 := v.Args[0]
+               if o0.Op != Op386ORL {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386MOVWloadidx2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off] {sym} (ADDL ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDL {
+               if s0.AuxInt != 16 {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               v.reset(Op386MOVWloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWloadidx1(v *Value) bool {
-       // match: (MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
-       // cond:
-       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               i2 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if v_1.AuxInt != 1 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWloadidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if idx != x0.Args[0] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVWloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               if p != x0.Args[1] {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWloadidx2(v *Value) bool {
-       // match: (MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVWloadidx2 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if mem != x0.Args[2] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVWloadidx2)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem)
-       // cond:
-       // result: (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               s1 := v.Args[1]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWloadidx2)
-               v.AuxInt = c + 2*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWstore(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWstore [off] {sym} ptr (MOVWLSX x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVWLSX {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWLZX x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVWLZX {
+               i3 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if idx != x2.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if p != x2.Args[1] {
                        break
                }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLconst {
+               if mem != x2.Args[2] {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off)) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               v.reset(Op386MOVWstoreconst)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)   && (base.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if s1.AuxInt != 24 {
                        break
                }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL1 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               v.reset(Op386MOVWstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL2 {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(Op386MOVWstoreidx2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} (ADDL ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDL {
+               if p != x0.Args[0] {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(Op386MOVWstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHRLconst {
+               if mem != x0.Args[2] {
                        break
                }
-               if v_1.AuxInt != 16 {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != Op386MOVWstore {
+               if s0.AuxInt != 16 {
                        break
                }
-               if x.AuxInt != i-2 {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if x.Aux != s {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if w != x.Args[1] {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHRLconst {
+               if p != x0.Args[0] {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != Op386MOVWstore {
+               if idx != x0.Args[1] {
                        break
                }
-               if x.AuxInt != i-2 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x.Aux != s {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               if p != x.Args[0] {
+               if s0.AuxInt != 16 {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != Op386SHRLconst {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if w0.AuxInt != j-16 {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if w != w0.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if idx != x1.Args[1] {
                        break
                }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWstoreconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if mem != x1.Args[2] {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               v.reset(Op386MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)   && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if s1.AuxInt != 24 {
                        break
                }
-               v.reset(Op386MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL1 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               v.reset(Op386MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386LEAL2 {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(Op386MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [x] {sym} (ADDL ptr idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDL {
+               if idx != x0.Args[0] {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               v.reset(Op386MOVWstoreconstidx1)
-               v.AuxInt = x
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               x := v.Args[1]
-               if x.Op != Op386MOVWstoreconst {
+               if p != x0.Args[1] {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if mem != x0.Args[2] {
                        break
                }
-               if p != x.Args[0] {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               mem := x.Args[1]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               if s0.AuxInt != 16 {
                        break
                }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWstoreconstidx1(v *Value) bool {
-       // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if v_1.AuxInt != 1 {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstoreconstidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if p != x1.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != Op386MOVWstoreconstidx1 {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if s1.AuxInt != 24 {
                        break
                }
-               if p != x.Args[0] {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               if i != x.Args[1] {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(i)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWstoreconstidx2(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem)
-       // cond:
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               if idx != x0.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(Op386MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(2 * c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
-       // cond: x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != Op386MOVWstoreconstidx2 {
+               if p != x0.Args[1] {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if mem != x0.Args[2] {
                        break
                }
-               if p != x.Args[0] {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               if i != x.Args[1] {
+               if s0.AuxInt != 16 {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               v.reset(Op386MOVLstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, i.Type)
-               v0.AuxInt = 1
-               v0.AddArg(i)
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(mem)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVWstoreidx1(v *Value) bool {
-       // match: (MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem)
-       // cond:
-       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if s1.AuxInt != 24 {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVWstoreidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVWstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-       // cond:
-       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVWstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != Op386SHRLconst {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
                        break
                }
-               if v_2.AuxInt != 16 {
+               if mem != x1.Args[2] {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != Op386MOVWstoreidx1 {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               if x.AuxInt != i-2 {
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               if x.Aux != s {
+               if s1.AuxInt != 24 {
                        break
                }
-               if p != x.Args[0] {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               if idx != x.Args[1] {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if w != x.Args[2] {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != Op386SHRLconst {
+               if p != x0.Args[0] {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != Op386MOVWstoreidx1 {
+               if idx != x0.Args[1] {
                        break
                }
-               if x.AuxInt != i-2 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x.Aux != s {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               if p != x.Args[0] {
+               if s0.AuxInt != 16 {
                        break
                }
-               if idx != x.Args[1] {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != Op386SHRLconst {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if w0.AuxInt != j-16 {
+               if idx != x1.Args[0] {
                        break
                }
-               if w != w0.Args[0] {
+               if p != x1.Args[1] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MOVWstoreidx2(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != Op386ADDLconst {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVWstoreidx2)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem)
-       // cond:
-       // result: (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386ADDLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(Op386MOVWstoreidx2)
-               v.AuxInt = c + 2*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != Op386SHRLconst {
+               if s1.AuxInt != 24 {
                        break
                }
-               if v_2.AuxInt != 16 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != Op386MOVWstoreidx2 {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if x.AuxInt != i-2 {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if w != x.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem))))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != Op386SHRLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != Op386MOVWstoreidx2 {
+               if s1.AuxInt != 24 {
                        break
                }
-               if x.AuxInt != i-2 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               if x.Aux != s {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if p != x.Args[0] {
+               x0 := o0.Args[0]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if idx != x.Args[1] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != Op386SHRLconst {
+               if idx != x0.Args[0] {
                        break
                }
-               if w0.AuxInt != j-16 {
+               if p != x0.Args[1] {
                        break
                }
-               if w != w0.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               s0 := o0.Args[1]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
-               v.AddArg(v0)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MULL(v *Value) bool {
-       // match: (MULL x (MOVLconst [c]))
-       // cond:
-       // result: (MULLconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLconst {
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(Op386MULLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULL (MOVLconst [c]) x)
-       // cond:
-       // result: (MULLconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MOVLconst {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(Op386MULLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386MULLconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULLconst [c] (MULLconst [d] x))
-       // cond:
-       // result: (MULLconst [int64(int32(c * d))] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MULLconst {
+               if idx != x1.Args[0] {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(Op386MULLconst)
-               v.AuxInt = int64(int32(c * d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [-1] x)
-       // cond:
-       // result: (NEGL x)
-       for {
-               if v.AuxInt != -1 {
+               if p != x1.Args[1] {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386NEGL)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [0] _)
-       // cond:
-       // result: (MOVLconst [0])
-       for {
-               if v.AuxInt != 0 {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(Op386MOVLconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MULLconst [1] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 1 {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               x := v.Args[0]
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MULLconst [3] x)
-       // cond:
-       // result: (LEAL2 x x)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               if v.AuxInt != 3 {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL2)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [5] x)
-       // cond:
-       // result: (LEAL4 x x)
-       for {
-               if v.AuxInt != 5 {
+               if s1.AuxInt != 24 {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL4)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [7] x)
-       // cond:
-       // result: (LEAL8 (NEGL <v.Type> x) x)
-       for {
-               if v.AuxInt != 7 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL8)
-               v0 := b.NewValue0(v.Pos, Op386NEGL, v.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [9] x)
-       // cond:
-       // result: (LEAL8 x x)
-       for {
-               if v.AuxInt != 9 {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL8)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [11] x)
-       // cond:
-       // result: (LEAL2 x (LEAL4 <v.Type> x x))
-       for {
-               if v.AuxInt != 11 {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL2)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [13] x)
-       // cond:
-       // result: (LEAL4 x (LEAL2 <v.Type> x x))
-       for {
-               if v.AuxInt != 13 {
+               if s0.AuxInt != 16 {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL4)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [21] x)
-       // cond:
-       // result: (LEAL4 x (LEAL4 <v.Type> x x))
-       for {
-               if v.AuxInt != 21 {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL4)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [25] x)
-       // cond:
-       // result: (LEAL8 x (LEAL2 <v.Type> x x))
-       for {
-               if v.AuxInt != 25 {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [37] x)
-       // cond:
-       // result: (LEAL4 x (LEAL8 <v.Type> x x))
-       for {
-               if v.AuxInt != 37 {
+               if p != x1.Args[0] {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL4)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [41] x)
-       // cond:
-       // result: (LEAL8 x (LEAL4 <v.Type> x x))
-       for {
-               if v.AuxInt != 41 {
+               if idx != x1.Args[1] {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [73] x)
-       // cond:
-       // result: (LEAL8 x (LEAL8 <v.Type> x x))
-       for {
-               if v.AuxInt != 73 {
+               if mem != x1.Args[2] {
                        break
                }
-               x := v.Args[0]
-               v.reset(Op386LEAL8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c)
-       // result: (SHLLconst [log2(c)] x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c)) {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               v.reset(Op386SHLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUBL (SHLLconst <v.Type> [log2(c+1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(Op386SUBL)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
-               v0.AuxInt = log2(c + 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (LEAL1 (SHLLconst <v.Type> [log2(c-1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(Op386LEAL1)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c-2) && c >= 34
-       // result: (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-2) && c >= 34) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
-               v.reset(Op386LEAL2)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
-               v0.AuxInt = log2(c - 2)
-               v0.AddArg(x)
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(x)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c-4) && c >= 68
-       // result: (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-4) && c >= 68) {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386LEAL4)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
-               v0.AuxInt = log2(c - 4)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo(c-8) && c >= 136
-       // result: (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-8) && c >= 136) {
+               if s1.AuxInt != 24 {
                        break
                }
-               v.reset(Op386LEAL8)
-               v0 := b.NewValue0(v.Pos, Op386SHLLconst, v.Type)
-               v0.AuxInt = log2(c - 8)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               v.reset(Op386SHLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, Op386LEAL2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               v.reset(Op386SHLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386SHLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [c] (MOVLconst [d]))
-       // cond:
-       // result: (MOVLconst [int64(int32(c*d))])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MOVLconst {
+               if s0.AuxInt != 16 {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(Op386MOVLconst)
-               v.AuxInt = int64(int32(c * d))
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386NEGL(v *Value) bool {
-       // match: (NEGL (MOVLconst [c]))
-       // cond:
-       // result: (MOVLconst [int64(int32(-c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MOVLconst {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(Op386MOVLconst)
-               v.AuxInt = int64(int32(-c))
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386NOTL(v *Value) bool {
-       // match: (NOTL (MOVLconst [c]))
-       // cond:
-       // result: (MOVLconst [^c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MOVLconst {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(Op386MOVLconst)
-               v.AuxInt = ^c
-               return true
-       }
-       return false
-}
-func rewriteValue386_Op386ORL(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (ORL x (MOVLconst [c]))
-       // cond:
-       // result: (ORLconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLconst {
+               if p != x1.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(Op386ORLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL (MOVLconst [c]) x)
-       // cond:
-       // result: (ORLconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MOVLconst {
+               if idx != x1.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(Op386ORLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORL (SHLLconst [c] x) (SHRLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [c   ] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHLLconst {
+               if mem != x1.Args[2] {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHRLconst {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x != v_1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               v.reset(Op386ROLLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: ( ORL (SHRLconst [c] x) (SHLLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [32-c] x)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHRLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
+                       break
+               }
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
+                       break
+               }
+               if s0.AuxInt != 16 {
+                       break
+               }
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               if idx != x1.Args[0] {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               if p != x1.Args[1] {
                        break
                }
-               if x != v_1.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(Op386ROLLconst)
-               v.AuxInt = 32 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
-       for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHLLconst {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHRWconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if v_1.AuxInt != 16-c {
+               if p != x0.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(Op386ROLWconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: ( ORL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHRWconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               if s1.AuxInt != 24 {
                        break
                }
-               if v_1.AuxInt != 16-c {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               if x != v_1.Args[0] {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               v.reset(Op386ROLWconst)
-               v.AuxInt = 16 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
-       for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHLLconst {
+               if s0.AuxInt != 16 {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHRBconst {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if v_1.AuxInt != 8-c {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x != v_1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               if p != x1.Args[1] {
                        break
                }
-               v.reset(Op386ROLBconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
-       for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != Op386SHRBconst {
+               if mem != x1.Args[2] {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386SHLLconst {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if v_1.AuxInt != 8-c {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x != v_1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(Op386ROLBconst)
-               v.AuxInt = 8 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL x:(SHLLconst _) y)
-       // cond: y.Op != Op386SHLLconst
-       // result: (ORL y x)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != Op386SHLLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               y := v.Args[1]
-               if !(y.Op != Op386SHLLconst) {
+               if s1.AuxInt != 24 {
                        break
                }
-               v.reset(Op386ORL)
-               v.AddArg(y)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL                  x0:(MOVBload [i]   {s} p mem)     s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
-       for {
-               x0 := v.Args[0]
-               if x0.Op != Op386MOVBload {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != Op386SHLLconst {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if s0.AuxInt != 8 {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != Op386MOVBload {
+               if s0.AuxInt != 16 {
                        break
                }
-               if x1.AuxInt != i+1 {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
+               i2 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
                if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if mem != x1.Args[2] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, Op386MOVWload, types.UInt16)
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORL o0:(ORL                        x0:(MOVWload [i]   {s} p mem)     s0:(SHLLconst [16] x1:(MOVBload [i+2] {s} p mem)))     s1:(SHLLconst [24] x2:(MOVBload [i+3] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               o0 := v.Args[0]
-               if o0.Op != Op386ORL {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != Op386MOVWload {
+               if s1.AuxInt != 24 {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o0.Args[1]
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
                if s0.Op != Op386SHLLconst {
                        break
                }
@@ -7514,187 +11659,205 @@ func rewriteValue386_Op386ORL(v *Value) bool {
                        break
                }
                x1 := s0.Args[0]
-               if x1.Op != Op386MOVBload {
-                       break
-               }
-               if x1.AuxInt != i+2 {
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
+               i2 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
                if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
-                       break
-               }
-               s1 := v.Args[1]
-               if s1.Op != Op386SHLLconst {
+               if idx != x1.Args[1] {
                        break
                }
-               if s1.AuxInt != 24 {
+               if mem != x1.Args[2] {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != Op386MOVBload {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if x2.AuxInt != i+3 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x2.Aux != s {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x2.Args[0] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x2.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, Op386MOVLload, types.UInt32)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORL                  x0:(MOVBloadidx1 [i]   {s} p idx mem)     s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               x0 := v.Args[0]
-               if x0.Op != Op386MOVBloadidx1 {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := v.Args[1]
+               if s1.AuxInt != 24 {
+                       break
+               }
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
+                       break
+               }
+               i3 := x2.AuxInt
+               s := x2.Aux
+               p := x2.Args[0]
+               idx := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
+                       break
+               }
+               s0 := o0.Args[0]
                if s0.Op != Op386SHLLconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               if s0.AuxInt != 16 {
                        break
                }
                x1 := s0.Args[0]
                if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if x1.AuxInt != i+1 {
-                       break
-               }
+               i2 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type)
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
                v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORL o0:(ORL                        x0:(MOVWloadidx1 [i]   {s} p idx mem)     s0:(SHLLconst [16] x1:(MOVBloadidx1 [i+2] {s} p idx mem)))     s1:(SHLLconst [24] x2:(MOVBloadidx1 [i+3] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
+       // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)))
+       // cond: i2 == i0+2   && i3 == i0+3   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               o0 := v.Args[0]
-               if o0.Op != Op386ORL {
-                       break
-               }
-               x0 := o0.Args[0]
-               if x0.Op != Op386MOVWloadidx1 {
-                       break
-               }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := o0.Args[1]
-               if s0.Op != Op386SHLLconst {
+               s1 := v.Args[0]
+               if s1.Op != Op386SHLLconst {
                        break
                }
-               if s0.AuxInt != 16 {
+               if s1.AuxInt != 24 {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != Op386MOVBloadidx1 {
+               x2 := s1.Args[0]
+               if x2.Op != Op386MOVBloadidx1 {
                        break
                }
-               if x1.AuxInt != i+2 {
+               i3 := x2.AuxInt
+               s := x2.Aux
+               idx := x2.Args[0]
+               p := x2.Args[1]
+               mem := x2.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != Op386ORL {
                        break
                }
-               if x1.Aux != s {
+               s0 := o0.Args[0]
+               if s0.Op != Op386SHLLconst {
                        break
                }
-               if p != x1.Args[0] {
+               if s0.AuxInt != 16 {
                        break
                }
-               if idx != x1.Args[1] {
+               x1 := s0.Args[0]
+               if x1.Op != Op386MOVBloadidx1 {
                        break
                }
-               if mem != x1.Args[2] {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               s1 := v.Args[1]
-               if s1.Op != Op386SHLLconst {
+               if idx != x1.Args[0] {
                        break
                }
-               if s1.AuxInt != 24 {
+               if p != x1.Args[1] {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != Op386MOVBloadidx1 {
+               if mem != x1.Args[2] {
                        break
                }
-               if x2.AuxInt != i+3 {
+               x0 := o0.Args[1]
+               if x0.Op != Op386MOVWloadidx1 {
                        break
                }
-               if x2.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x2.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x2.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x2.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2)
                v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
                v0.AddArg(idx)
@@ -9182,9 +13345,9 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL (SHLLconst [c] x) (SHRLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [c   ] x)
+       // match: (XORL (SHLLconst [c] x) (SHRLconst [d] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != Op386SHLLconst {
@@ -9196,10 +13359,11 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                if v_1.Op != Op386SHRLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(Op386ROLLconst)
@@ -9207,34 +13371,35 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL (SHRLconst [c] x) (SHLLconst [32-c] x))
-       // cond:
-       // result: (ROLLconst [32-c] x)
+       // match: (XORL (SHRLconst [d] x) (SHLLconst [c] x))
+       // cond: d == 32-c
+       // result: (ROLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRLconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(Op386ROLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
+       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -9247,13 +13412,11 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                if v_1.Op != Op386SHRWconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
                        break
                }
                v.reset(Op386ROLWconst)
@@ -9261,38 +13424,36 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
+       // match: (XORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: c < 16 && d == 16-c && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               if !(c < 16 && d == 16-c && t.Size() == 2) {
                        break
                }
                v.reset(Op386ROLWconst)
-               v.AuxInt = 16 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
+       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -9305,13 +13466,11 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                if v_1.Op != Op386SHRBconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
                        break
                }
                v.reset(Op386ROLBconst)
@@ -9319,32 +13478,30 @@ func rewriteValue386_Op386XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
+       // match: (XORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: c < 8 && d == 8-c && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != Op386SHRBconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != Op386SHLLconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if !(c < 8 && d == 8-c && t.Size() == 1) {
                        break
                }
                v.reset(Op386ROLBconst)
-               v.AuxInt = 8 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -9410,7 +13567,7 @@ func rewriteValue386_Op386XORLconst(v *Value) bool {
        return false
 }
 func rewriteValue386_OpAdd16(v *Value) bool {
-       // match: (Add16  x y)
+       // match: (Add16 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -9423,7 +13580,7 @@ func rewriteValue386_OpAdd16(v *Value) bool {
        }
 }
 func rewriteValue386_OpAdd32(v *Value) bool {
-       // match: (Add32  x y)
+       // match: (Add32 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -9490,7 +13647,7 @@ func rewriteValue386_OpAdd64F(v *Value) bool {
        }
 }
 func rewriteValue386_OpAdd8(v *Value) bool {
-       // match: (Add8   x y)
+       // match: (Add8 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -9555,7 +13712,7 @@ func rewriteValue386_OpAnd32(v *Value) bool {
        }
 }
 func rewriteValue386_OpAnd8(v *Value) bool {
-       // match: (And8  x y)
+       // match: (And8 x y)
        // cond:
        // result: (ANDL x y)
        for {
@@ -9644,7 +13801,7 @@ func rewriteValue386_OpCom32(v *Value) bool {
        }
 }
 func rewriteValue386_OpCom8(v *Value) bool {
-       // match: (Com8  x)
+       // match: (Com8 x)
        // cond:
        // result: (NOTL x)
        for {
@@ -9655,7 +13812,7 @@ func rewriteValue386_OpCom8(v *Value) bool {
        }
 }
 func rewriteValue386_OpConst16(v *Value) bool {
-       // match: (Const16  [val])
+       // match: (Const16 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -9666,7 +13823,7 @@ func rewriteValue386_OpConst16(v *Value) bool {
        }
 }
 func rewriteValue386_OpConst32(v *Value) bool {
-       // match: (Const32  [val])
+       // match: (Const32 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -9699,7 +13856,7 @@ func rewriteValue386_OpConst64F(v *Value) bool {
        }
 }
 func rewriteValue386_OpConst8(v *Value) bool {
-       // match: (Const8   [val])
+       // match: (Const8 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -9812,7 +13969,7 @@ func rewriteValue386_OpCvt64Fto32F(v *Value) bool {
        }
 }
 func rewriteValue386_OpDiv16(v *Value) bool {
-       // match: (Div16  x y)
+       // match: (Div16 x y)
        // cond:
        // result: (DIVW  x y)
        for {
@@ -9838,7 +13995,7 @@ func rewriteValue386_OpDiv16u(v *Value) bool {
        }
 }
 func rewriteValue386_OpDiv32(v *Value) bool {
-       // match: (Div32  x y)
+       // match: (Div32 x y)
        // cond:
        // result: (DIVL  x y)
        for {
@@ -9894,7 +14051,7 @@ func rewriteValue386_OpDiv8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8   x y)
+       // match: (Div8 x y)
        // cond:
        // result: (DIVW  (SignExt8to16 x) (SignExt8to16 y))
        for {
@@ -9915,7 +14072,7 @@ func rewriteValue386_OpDiv8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8u  x y)
+       // match: (Div8u x y)
        // cond:
        // result: (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))
        for {
@@ -9934,7 +14091,7 @@ func rewriteValue386_OpDiv8u(v *Value) bool {
 func rewriteValue386_OpEq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq16  x y)
+       // match: (Eq16 x y)
        // cond:
        // result: (SETEQ (CMPW x y))
        for {
@@ -9951,7 +14108,7 @@ func rewriteValue386_OpEq16(v *Value) bool {
 func rewriteValue386_OpEq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq32  x y)
+       // match: (Eq32 x y)
        // cond:
        // result: (SETEQ (CMPL x y))
        for {
@@ -10002,7 +14159,7 @@ func rewriteValue386_OpEq64F(v *Value) bool {
 func rewriteValue386_OpEq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq8   x y)
+       // match: (Eq8 x y)
        // cond:
        // result: (SETEQ (CMPB x y))
        for {
@@ -10019,7 +14176,7 @@ func rewriteValue386_OpEq8(v *Value) bool {
 func rewriteValue386_OpEqB(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (EqB   x y)
+       // match: (EqB x y)
        // cond:
        // result: (SETEQ (CMPB x y))
        for {
@@ -10053,7 +14210,7 @@ func rewriteValue386_OpEqPtr(v *Value) bool {
 func rewriteValue386_OpGeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq16  x y)
+       // match: (Geq16 x y)
        // cond:
        // result: (SETGE (CMPW x y))
        for {
@@ -10087,7 +14244,7 @@ func rewriteValue386_OpGeq16U(v *Value) bool {
 func rewriteValue386_OpGeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq32  x y)
+       // match: (Geq32 x y)
        // cond:
        // result: (SETGE (CMPL x y))
        for {
@@ -10155,7 +14312,7 @@ func rewriteValue386_OpGeq64F(v *Value) bool {
 func rewriteValue386_OpGeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq8   x y)
+       // match: (Geq8 x y)
        // cond:
        // result: (SETGE (CMPB x y))
        for {
@@ -10172,7 +14329,7 @@ func rewriteValue386_OpGeq8(v *Value) bool {
 func rewriteValue386_OpGeq8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq8U  x y)
+       // match: (Geq8U x y)
        // cond:
        // result: (SETAE (CMPB x y))
        for {
@@ -10209,7 +14366,7 @@ func rewriteValue386_OpGetG(v *Value) bool {
 func rewriteValue386_OpGreater16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater16  x y)
+       // match: (Greater16 x y)
        // cond:
        // result: (SETG (CMPW x y))
        for {
@@ -10243,7 +14400,7 @@ func rewriteValue386_OpGreater16U(v *Value) bool {
 func rewriteValue386_OpGreater32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater32  x y)
+       // match: (Greater32 x y)
        // cond:
        // result: (SETG (CMPL x y))
        for {
@@ -10311,7 +14468,7 @@ func rewriteValue386_OpGreater64F(v *Value) bool {
 func rewriteValue386_OpGreater8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater8   x y)
+       // match: (Greater8 x y)
        // cond:
        // result: (SETG (CMPB x y))
        for {
@@ -10328,7 +14485,7 @@ func rewriteValue386_OpGreater8(v *Value) bool {
 func rewriteValue386_OpGreater8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater8U  x y)
+       // match: (Greater8U x y)
        // cond:
        // result: (SETA (CMPB x y))
        for {
@@ -10343,7 +14500,7 @@ func rewriteValue386_OpGreater8U(v *Value) bool {
        }
 }
 func rewriteValue386_OpHmul32(v *Value) bool {
-       // match: (Hmul32  x y)
+       // match: (Hmul32 x y)
        // cond:
        // result: (HMULL  x y)
        for {
@@ -10436,7 +14593,7 @@ func rewriteValue386_OpIsSliceInBounds(v *Value) bool {
 func rewriteValue386_OpLeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq16  x y)
+       // match: (Leq16 x y)
        // cond:
        // result: (SETLE (CMPW x y))
        for {
@@ -10470,7 +14627,7 @@ func rewriteValue386_OpLeq16U(v *Value) bool {
 func rewriteValue386_OpLeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq32  x y)
+       // match: (Leq32 x y)
        // cond:
        // result: (SETLE (CMPL x y))
        for {
@@ -10538,7 +14695,7 @@ func rewriteValue386_OpLeq64F(v *Value) bool {
 func rewriteValue386_OpLeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq8   x y)
+       // match: (Leq8 x y)
        // cond:
        // result: (SETLE (CMPB x y))
        for {
@@ -10555,7 +14712,7 @@ func rewriteValue386_OpLeq8(v *Value) bool {
 func rewriteValue386_OpLeq8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq8U  x y)
+       // match: (Leq8U x y)
        // cond:
        // result: (SETBE (CMPB x y))
        for {
@@ -10572,7 +14729,7 @@ func rewriteValue386_OpLeq8U(v *Value) bool {
 func rewriteValue386_OpLess16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less16  x y)
+       // match: (Less16 x y)
        // cond:
        // result: (SETL (CMPW x y))
        for {
@@ -10606,7 +14763,7 @@ func rewriteValue386_OpLess16U(v *Value) bool {
 func rewriteValue386_OpLess32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less32  x y)
+       // match: (Less32 x y)
        // cond:
        // result: (SETL (CMPL x y))
        for {
@@ -10674,7 +14831,7 @@ func rewriteValue386_OpLess64F(v *Value) bool {
 func rewriteValue386_OpLess8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less8   x y)
+       // match: (Less8 x y)
        // cond:
        // result: (SETL (CMPB x y))
        for {
@@ -10691,7 +14848,7 @@ func rewriteValue386_OpLess8(v *Value) bool {
 func rewriteValue386_OpLess8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less8U  x y)
+       // match: (Less8U x y)
        // cond:
        // result: (SETB (CMPB x y))
        for {
@@ -10871,7 +15028,7 @@ func rewriteValue386_OpLsh16x64(v *Value) bool {
 func rewriteValue386_OpLsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x8  <t> x y)
+       // match: (Lsh16x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -10980,7 +15137,7 @@ func rewriteValue386_OpLsh32x64(v *Value) bool {
 func rewriteValue386_OpLsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x8  <t> x y)
+       // match: (Lsh32x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -11089,7 +15246,7 @@ func rewriteValue386_OpLsh8x64(v *Value) bool {
 func rewriteValue386_OpLsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x8  <t> x y)
+       // match: (Lsh8x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -11111,7 +15268,7 @@ func rewriteValue386_OpLsh8x8(v *Value) bool {
        }
 }
 func rewriteValue386_OpMod16(v *Value) bool {
-       // match: (Mod16  x y)
+       // match: (Mod16 x y)
        // cond:
        // result: (MODW  x y)
        for {
@@ -11137,7 +15294,7 @@ func rewriteValue386_OpMod16u(v *Value) bool {
        }
 }
 func rewriteValue386_OpMod32(v *Value) bool {
-       // match: (Mod32  x y)
+       // match: (Mod32 x y)
        // cond:
        // result: (MODL  x y)
        for {
@@ -11167,7 +15324,7 @@ func rewriteValue386_OpMod8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8   x y)
+       // match: (Mod8 x y)
        // cond:
        // result: (MODW  (SignExt8to16 x) (SignExt8to16 y))
        for {
@@ -11188,7 +15345,7 @@ func rewriteValue386_OpMod8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8u  x y)
+       // match: (Mod8u x y)
        // cond:
        // result: (MODWU (ZeroExt8to16 x) (ZeroExt8to16 y))
        for {
@@ -11493,7 +15650,7 @@ func rewriteValue386_OpMove(v *Value) bool {
        return false
 }
 func rewriteValue386_OpMul16(v *Value) bool {
-       // match: (Mul16  x y)
+       // match: (Mul16 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -11506,7 +15663,7 @@ func rewriteValue386_OpMul16(v *Value) bool {
        }
 }
 func rewriteValue386_OpMul32(v *Value) bool {
-       // match: (Mul32  x y)
+       // match: (Mul32 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -11558,7 +15715,7 @@ func rewriteValue386_OpMul64F(v *Value) bool {
        }
 }
 func rewriteValue386_OpMul8(v *Value) bool {
-       // match: (Mul8   x y)
+       // match: (Mul8 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -11571,7 +15728,7 @@ func rewriteValue386_OpMul8(v *Value) bool {
        }
 }
 func rewriteValue386_OpNeg16(v *Value) bool {
-       // match: (Neg16  x)
+       // match: (Neg16 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -11582,7 +15739,7 @@ func rewriteValue386_OpNeg16(v *Value) bool {
        }
 }
 func rewriteValue386_OpNeg32(v *Value) bool {
-       // match: (Neg32  x)
+       // match: (Neg32 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -11665,7 +15822,7 @@ func rewriteValue386_OpNeg64F(v *Value) bool {
        return false
 }
 func rewriteValue386_OpNeg8(v *Value) bool {
-       // match: (Neg8   x)
+       // match: (Neg8 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -11678,7 +15835,7 @@ func rewriteValue386_OpNeg8(v *Value) bool {
 func rewriteValue386_OpNeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq16  x y)
+       // match: (Neq16 x y)
        // cond:
        // result: (SETNE (CMPW x y))
        for {
@@ -11695,7 +15852,7 @@ func rewriteValue386_OpNeq16(v *Value) bool {
 func rewriteValue386_OpNeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq32  x y)
+       // match: (Neq32 x y)
        // cond:
        // result: (SETNE (CMPL x y))
        for {
@@ -11746,7 +15903,7 @@ func rewriteValue386_OpNeq64F(v *Value) bool {
 func rewriteValue386_OpNeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq8   x y)
+       // match: (Neq8 x y)
        // cond:
        // result: (SETNE (CMPB x y))
        for {
@@ -11763,7 +15920,7 @@ func rewriteValue386_OpNeq8(v *Value) bool {
 func rewriteValue386_OpNeqB(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (NeqB   x y)
+       // match: (NeqB x y)
        // cond:
        // result: (SETNE (CMPB x y))
        for {
@@ -11859,7 +16016,7 @@ func rewriteValue386_OpOr32(v *Value) bool {
        }
 }
 func rewriteValue386_OpOr8(v *Value) bool {
-       // match: (Or8  x y)
+       // match: (Or8 x y)
        // cond:
        // result: (ORL x y)
        for {
@@ -11996,7 +16153,7 @@ func rewriteValue386_OpRsh16Ux64(v *Value) bool {
 func rewriteValue386_OpRsh16Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16Ux8  <t> x y)
+       // match: (Rsh16Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16])))
        for {
@@ -12113,7 +16270,7 @@ func rewriteValue386_OpRsh16x64(v *Value) bool {
 func rewriteValue386_OpRsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16x8  <t> x y)
+       // match: (Rsh16x8 <t> x y)
        // cond:
        // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16])))))
        for {
@@ -12225,7 +16382,7 @@ func rewriteValue386_OpRsh32Ux64(v *Value) bool {
 func rewriteValue386_OpRsh32Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32Ux8  <t> x y)
+       // match: (Rsh32Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -12342,7 +16499,7 @@ func rewriteValue386_OpRsh32x64(v *Value) bool {
 func rewriteValue386_OpRsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32x8  <t> x y)
+       // match: (Rsh32x8 <t> x y)
        // cond:
        // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32])))))
        for {
@@ -12454,7 +16611,7 @@ func rewriteValue386_OpRsh8Ux64(v *Value) bool {
 func rewriteValue386_OpRsh8Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8Ux8  <t> x y)
+       // match: (Rsh8Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8])))
        for {
@@ -12571,7 +16728,7 @@ func rewriteValue386_OpRsh8x64(v *Value) bool {
 func rewriteValue386_OpRsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x8  <t> x y)
+       // match: (Rsh8x8 <t> x y)
        // cond:
        // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
        for {
@@ -12607,7 +16764,7 @@ func rewriteValue386_OpSignExt16to32(v *Value) bool {
        }
 }
 func rewriteValue386_OpSignExt8to16(v *Value) bool {
-       // match: (SignExt8to16  x)
+       // match: (SignExt8to16 x)
        // cond:
        // result: (MOVBLSX x)
        for {
@@ -12618,7 +16775,7 @@ func rewriteValue386_OpSignExt8to16(v *Value) bool {
        }
 }
 func rewriteValue386_OpSignExt8to32(v *Value) bool {
-       // match: (SignExt8to32  x)
+       // match: (SignExt8to32 x)
        // cond:
        // result: (MOVBLSX x)
        for {
@@ -12772,7 +16929,7 @@ func rewriteValue386_OpStore(v *Value) bool {
        return false
 }
 func rewriteValue386_OpSub16(v *Value) bool {
-       // match: (Sub16  x y)
+       // match: (Sub16 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -12785,7 +16942,7 @@ func rewriteValue386_OpSub16(v *Value) bool {
        }
 }
 func rewriteValue386_OpSub32(v *Value) bool {
-       // match: (Sub32  x y)
+       // match: (Sub32 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -12852,7 +17009,7 @@ func rewriteValue386_OpSub64F(v *Value) bool {
        }
 }
 func rewriteValue386_OpSub8(v *Value) bool {
-       // match: (Sub8   x y)
+       // match: (Sub8 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -12878,7 +17035,7 @@ func rewriteValue386_OpSubPtr(v *Value) bool {
        }
 }
 func rewriteValue386_OpTrunc16to8(v *Value) bool {
-       // match: (Trunc16to8  x)
+       // match: (Trunc16to8 x)
        // cond:
        // result: x
        for {
@@ -12902,7 +17059,7 @@ func rewriteValue386_OpTrunc32to16(v *Value) bool {
        }
 }
 func rewriteValue386_OpTrunc32to8(v *Value) bool {
-       // match: (Trunc32to8  x)
+       // match: (Trunc32to8 x)
        // cond:
        // result: x
        for {
@@ -12940,7 +17097,7 @@ func rewriteValue386_OpXor32(v *Value) bool {
        }
 }
 func rewriteValue386_OpXor8(v *Value) bool {
-       // match: (Xor8  x y)
+       // match: (Xor8 x y)
        // cond:
        // result: (XORL x y)
        for {
@@ -13239,7 +17396,7 @@ func rewriteValue386_OpZeroExt16to32(v *Value) bool {
        }
 }
 func rewriteValue386_OpZeroExt8to16(v *Value) bool {
-       // match: (ZeroExt8to16  x)
+       // match: (ZeroExt8to16 x)
        // cond:
        // result: (MOVBLZX x)
        for {
@@ -13250,7 +17407,7 @@ func rewriteValue386_OpZeroExt8to16(v *Value) bool {
        }
 }
 func rewriteValue386_OpZeroExt8to32(v *Value) bool {
-       // match: (ZeroExt8to32  x)
+       // match: (ZeroExt8to32 x)
        // cond:
        // result: (MOVBLZX x)
        for {
@@ -13592,7 +17749,7 @@ func rewriteBlock386(b *Block) bool {
                        return true
                }
        case BlockIf:
-               // match: (If (SETL  cmp) yes no)
+               // match: (If (SETL cmp) yes no)
                // cond:
                // result: (LT  cmp yes no)
                for {
@@ -13626,7 +17783,7 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETG  cmp) yes no)
+               // match: (If (SETG cmp) yes no)
                // cond:
                // result: (GT  cmp yes no)
                for {
@@ -13694,7 +17851,7 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETB  cmp) yes no)
+               // match: (If (SETB cmp) yes no)
                // cond:
                // result: (ULT cmp yes no)
                for {
@@ -13728,343 +17885,707 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETA  cmp) yes no)
+               // match: (If (SETA cmp) yes no)
+               // cond:
+               // result: (UGT cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETA {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386UGT
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If (SETAE cmp) yes no)
+               // cond:
+               // result: (UGE cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETAE {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386UGE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If (SETGF cmp) yes no)
+               // cond:
+               // result: (UGT  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETGF {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386UGT
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If (SETGEF cmp) yes no)
+               // cond:
+               // result: (UGE  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETGEF {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386UGE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If (SETEQF cmp) yes no)
+               // cond:
+               // result: (EQF  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETEQF {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386EQF
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If (SETNEF cmp) yes no)
+               // cond:
+               // result: (NEF  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386SETNEF {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386NEF
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (If cond yes no)
+               // cond:
+               // result: (NE (TESTB cond cond) yes no)
+               for {
+                       v := b.Control
+                       _ = v
+                       cond := b.Control
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386NE
+                       v0 := b.NewValue0(v.Pos, Op386TESTB, TypeFlags)
+                       v0.AddArg(cond)
+                       v0.AddArg(cond)
+                       b.SetControl(v0)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+       case Block386LE:
+               // match: (LE (InvertFlags cmp) yes no)
+               // cond:
+               // result: (GE cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386InvertFlags {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386GE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagEQ) yes no)
+               // cond:
+               // result: (First nil yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386FlagEQ {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagLT_ULT) yes no)
+               // cond:
+               // result: (First nil yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386FlagLT_ULT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagLT_UGT) yes no)
+               // cond:
+               // result: (First nil yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386FlagLT_UGT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagGT_ULT) yes no)
+               // cond:
+               // result: (First nil no yes)
+               for {
+                       v := b.Control
+                       if v.Op != Op386FlagGT_ULT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
+                       _ = no
+                       _ = yes
+                       return true
+               }
+               // match: (LE (FlagGT_UGT) yes no)
+               // cond:
+               // result: (First nil no yes)
+               for {
+                       v := b.Control
+                       if v.Op != Op386FlagGT_UGT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
+                       _ = no
+                       _ = yes
+                       return true
+               }
+       case Block386LT:
+               // match: (LT (InvertFlags cmp) yes no)
                // cond:
-               // result: (UGT cmp yes no)
+               // result: (GT cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386SETA {
+                       if v.Op != Op386InvertFlags {
                                break
                        }
                        cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGT
+                       b.Kind = Block386GT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (If (SETAE cmp) yes no)
+               // match: (LT (FlagEQ) yes no)
                // cond:
-               // result: (UGE cmp yes no)
+               // result: (First nil no yes)
                for {
                        v := b.Control
-                       if v.Op != Op386SETAE {
+                       if v.Op != Op386FlagEQ {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGE
-                       b.SetControl(cmp)
-                       _ = yes
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
                        _ = no
+                       _ = yes
                        return true
                }
-               // match: (If (SETGF  cmp) yes no)
+               // match: (LT (FlagLT_ULT) yes no)
                // cond:
-               // result: (UGT  cmp yes no)
+               // result: (First nil yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386SETGF {
+                       if v.Op != Op386FlagLT_ULT {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGT
-                       b.SetControl(cmp)
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (If (SETGEF cmp) yes no)
+               // match: (LT (FlagLT_UGT) yes no)
                // cond:
-               // result: (UGE  cmp yes no)
+               // result: (First nil yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386SETGEF {
+                       if v.Op != Op386FlagLT_UGT {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGE
-                       b.SetControl(cmp)
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (If (SETEQF cmp) yes no)
+               // match: (LT (FlagGT_ULT) yes no)
                // cond:
-               // result: (EQF  cmp yes no)
+               // result: (First nil no yes)
                for {
                        v := b.Control
-                       if v.Op != Op386SETEQF {
+                       if v.Op != Op386FlagGT_ULT {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386EQF
-                       b.SetControl(cmp)
-                       _ = yes
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
                        _ = no
+                       _ = yes
                        return true
                }
-               // match: (If (SETNEF cmp) yes no)
+               // match: (LT (FlagGT_UGT) yes no)
                // cond:
-               // result: (NEF  cmp yes no)
+               // result: (First nil no yes)
                for {
                        v := b.Control
-                       if v.Op != Op386SETNEF {
+                       if v.Op != Op386FlagGT_UGT {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386NEF
-                       b.SetControl(cmp)
-                       _ = yes
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
                        _ = no
+                       _ = yes
                        return true
                }
-               // match: (If cond yes no)
+       case Block386NE:
+               // match: (NE (TESTB (SETL cmp) (SETL cmp)) yes no)
                // cond:
-               // result: (NE (TESTB cond cond) yes no)
+               // result: (LT  cmp yes no)
                for {
                        v := b.Control
-                       _ = v
-                       cond := b.Control
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETL {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETL {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386NE
-                       v0 := b.NewValue0(v.Pos, Op386TESTB, TypeFlags)
-                       v0.AddArg(cond)
-                       v0.AddArg(cond)
-                       b.SetControl(v0)
+                       b.Kind = Block386LT
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-       case Block386LE:
-               // match: (LE (InvertFlags cmp) yes no)
+               // match: (NE (TESTB (SETL cmp) (SETL cmp)) yes no)
                // cond:
-               // result: (GE cmp yes no)
+               // result: (LT  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386InvertFlags {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETL {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETL {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386GE
+                       b.Kind = Block386LT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LE (FlagEQ) yes no)
+               // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (LE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagEQ {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETLE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETLE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = Block386LE
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LE (FlagLT_ULT) yes no)
+               // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (LE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagLT_ULT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETLE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETLE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = Block386LE
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LE (FlagLT_UGT) yes no)
+               // match: (NE (TESTB (SETG cmp) (SETG cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (GT  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagLT_UGT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETG {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETG {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = Block386GT
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LE (FlagGT_ULT) yes no)
+               // match: (NE (TESTB (SETG cmp) (SETG cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (GT  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagGT_ULT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETG {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETG {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = Block386GT
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-               // match: (LE (FlagGT_UGT) yes no)
+               // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (GE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagGT_UGT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETGE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETGE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = Block386GE
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-       case Block386LT:
-               // match: (LT (InvertFlags cmp) yes no)
+               // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no)
                // cond:
-               // result: (GT cmp yes no)
+               // result: (G cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386InvertFlags {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETGE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETGE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386GT
+                       b.Kind = Block386GE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LT (FlagEQ) yes no)
+               // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (EQ  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagEQ {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETEQ {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETEQ {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = Block386EQ
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-               // match: (LT (FlagLT_ULT) yes no)
+               // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (EQ  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagLT_ULT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETEQ {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETEQ {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = Block386EQ
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LT (FlagLT_UGT) yes no)
+               // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (NE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagLT_UGT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETNE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETNE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = Block386NE
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LT (FlagGT_ULT) yes no)
+               // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (NE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagGT_ULT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETNE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETNE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = Block386NE
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-               // match: (LT (FlagGT_UGT) yes no)
+               // match: (NE (TESTB (SETB cmp) (SETB cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (ULT cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != Op386FlagGT_UGT {
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETB {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETB {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = Block386ULT
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-       case Block386NE:
-               // match: (NE (TESTB (SETL  cmp) (SETL  cmp)) yes no)
+               // match: (NE (TESTB (SETB cmp) (SETB cmp)) yes no)
                // cond:
-               // result: (LT  cmp yes no)
+               // result: (ULT cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETL {
+                       if v_0.Op != Op386SETB {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETL {
+                       if v_1.Op != Op386SETB {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14072,27 +18593,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386LT
+                       b.Kind = Block386ULT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no)
+               // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no)
                // cond:
-               // result: (LE  cmp yes no)
+               // result: (ULE cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETLE {
+                       if v_0.Op != Op386SETBE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETLE {
+                       if v_1.Op != Op386SETBE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14100,27 +18621,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386LE
+                       b.Kind = Block386ULE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETG  cmp) (SETG  cmp)) yes no)
+               // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no)
                // cond:
-               // result: (GT  cmp yes no)
+               // result: (ULE cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETG {
+                       if v_0.Op != Op386SETBE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETG {
+                       if v_1.Op != Op386SETBE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14128,27 +18649,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386GT
+                       b.Kind = Block386ULE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no)
+               // match: (NE (TESTB (SETA cmp) (SETA cmp)) yes no)
                // cond:
-               // result: (GE  cmp yes no)
+               // result: (UGT cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETGE {
+                       if v_0.Op != Op386SETA {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETGE {
+                       if v_1.Op != Op386SETA {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14156,27 +18677,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386GE
+                       b.Kind = Block386UGT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no)
+               // match: (NE (TESTB (SETA cmp) (SETA cmp)) yes no)
                // cond:
-               // result: (EQ  cmp yes no)
+               // result: (UGT cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETEQ {
+                       if v_0.Op != Op386SETA {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETEQ {
+                       if v_1.Op != Op386SETA {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14184,27 +18705,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386EQ
+                       b.Kind = Block386UGT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no)
+               // match: (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no)
                // cond:
-               // result: (NE  cmp yes no)
+               // result: (UGE cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETNE {
+                       if v_0.Op != Op386SETAE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETNE {
+                       if v_1.Op != Op386SETAE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14212,27 +18733,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386NE
+                       b.Kind = Block386UGE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETB  cmp) (SETB  cmp)) yes no)
+               // match: (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no)
                // cond:
-               // result: (ULT cmp yes no)
+               // result: (UGE cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETB {
+                       if v_0.Op != Op386SETAE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETB {
+                       if v_1.Op != Op386SETAE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14240,27 +18761,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386ULT
+                       b.Kind = Block386UGE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no)
+               // match: (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no)
                // cond:
-               // result: (ULE cmp yes no)
+               // result: (UGT  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETBE {
+                       if v_0.Op != Op386SETGF {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETBE {
+                       if v_1.Op != Op386SETGF {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14268,27 +18789,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386ULE
+                       b.Kind = Block386UGT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETA  cmp) (SETA  cmp)) yes no)
+               // match: (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no)
                // cond:
-               // result: (UGT cmp yes no)
+               // result: (UGT  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETA {
+                       if v_0.Op != Op386SETGF {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETA {
+                       if v_1.Op != Op386SETGF {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14302,21 +18823,21 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no)
+               // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no)
                // cond:
-               // result: (UGE cmp yes no)
+               // result: (UGE  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETAE {
+                       if v_0.Op != Op386SETGEF {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETAE {
+                       if v_1.Op != Op386SETGEF {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14330,21 +18851,21 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETGF  cmp) (SETGF  cmp)) yes no)
+               // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no)
                // cond:
-               // result: (UGT  cmp yes no)
+               // result: (UGE  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETGF {
+                       if v_0.Op != Op386SETGEF {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETGF {
+                       if v_1.Op != Op386SETGEF {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14352,27 +18873,27 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGT
+                       b.Kind = Block386UGE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no)
+               // match: (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no)
                // cond:
-               // result: (UGE  cmp yes no)
+               // result: (EQF  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != Op386TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != Op386SETGEF {
+                       if v_0.Op != Op386SETEQF {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != Op386SETGEF {
+                       if v_1.Op != Op386SETEQF {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -14380,7 +18901,7 @@ func rewriteBlock386(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = Block386UGE
+                       b.Kind = Block386EQF
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
@@ -14442,6 +18963,34 @@ func rewriteBlock386(b *Block) bool {
                        _ = no
                        return true
                }
+               // match: (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no)
+               // cond:
+               // result: (NEF  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != Op386TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != Op386SETNEF {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != Op386SETNEF {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = Block386NEF
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
                // match: (NE (InvertFlags cmp) yes no)
                // cond:
                // result: (NE cmp yes no)
index 99c4a0a8544ce5f72afebf0e20e9a09d971b3968..91e05456e0db051a5e07d6a323b9cb6eb216700a 100644 (file)
@@ -860,9 +860,9 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL (SHLLconst x [c]) (SHRLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [   c])
+       // match: (ADDL (SHLLconst x [c]) (SHRLconst x [d]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHLLconst {
@@ -874,10 +874,11 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                if v_1.Op != OpAMD64SHRLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpAMD64ROLLconst)
@@ -885,34 +886,35 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL (SHRLconst x [c]) (SHLLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [32-c])
+       // match: (ADDL (SHRLconst x [d]) (SHLLconst x [c]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRLconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpAMD64ROLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
+       // match: (ADDL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -925,13 +927,11 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                if v_1.Op != OpAMD64SHRWconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
                        break
                }
                v.reset(OpAMD64ROLWconst)
@@ -939,38 +939,36 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
+       // match: (ADDL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
                        break
                }
                v.reset(OpAMD64ROLWconst)
-               v.AuxInt = 16 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
+       // match: (ADDL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -983,13 +981,11 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                if v_1.Op != OpAMD64SHRBconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
                        break
                }
                v.reset(OpAMD64ROLBconst)
@@ -997,32 +993,30 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
+       // match: (ADDL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRBconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
                        break
                }
                v.reset(OpAMD64ROLBconst)
-               v.AuxInt = 8 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -1041,6 +1035,21 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL (NEGL y) x)
+       // cond:
+       // result: (SUBL x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGL {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64SUBL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDL x l:(MOVLload [off] {sym} ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
        // result: (ADDLmem x [off] {sym} ptr mem)
@@ -1196,9 +1205,9 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDQ (SHLQconst x [c]) (SHRQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [   c])
+       // match: (ADDQ (SHLQconst x [c]) (SHRQconst x [d]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHLQconst {
@@ -1210,10 +1219,11 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                if v_1.Op != OpAMD64SHRQconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpAMD64ROLQconst)
@@ -1221,28 +1231,29 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDQ (SHRQconst x [c]) (SHLQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [64-c])
+       // match: (ADDQ (SHRQconst x [d]) (SHLQconst x [c]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpAMD64ROLQconst)
-               v.AuxInt = 64 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -1264,6 +1275,24 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDQ (SHLQconst [3] y) x)
+       // cond:
+       // result: (LEAQ8 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 3 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDQ x (SHLQconst [2] y))
        // cond:
        // result: (LEAQ4 x y)
@@ -1282,6 +1311,24 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDQ (SHLQconst [2] y) x)
+       // cond:
+       // result: (LEAQ4 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDQ x (SHLQconst [1] y))
        // cond:
        // result: (LEAQ2 x y)
@@ -1300,6 +1347,24 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDQ (SHLQconst [1] y) x)
+       // cond:
+       // result: (LEAQ2 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDQ x (ADDQ y y))
        // cond:
        // result: (LEAQ2 x y)
@@ -1318,6 +1383,24 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDQ (ADDQ y y) x)
+       // cond:
+       // result: (LEAQ2 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               y := v_0.Args[0]
+               if y != v_0.Args[1] {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDQ x (ADDQ x y))
        // cond:
        // result: (LEAQ2 y x)
@@ -1354,6 +1437,42 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (ADDQ (ADDQ x y) x)
+       // cond:
+       // result: (LEAQ2 y x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ADDQ (ADDQ y x) x)
+       // cond:
+       // result: (LEAQ2 y x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
        // match: (ADDQ (ADDQconst [c] x) y)
        // cond:
        // result: (LEAQ1 [c] x y)
@@ -1371,17 +1490,17 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (ADDQ x (ADDQconst [c] y))
+       // match: (ADDQ y (ADDQconst [c] x))
        // cond:
        // result: (LEAQ1 [c] x y)
        for {
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
                c := v_1.AuxInt
-               y := v_1.Args[0]
+               x := v_1.Args[0]
                v.reset(OpAMD64LEAQ1)
                v.AuxInt = c
                v.AddArg(x)
@@ -1410,7 +1529,7 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (ADDQ (LEAQ [c] {s} x) y)
+       // match: (ADDQ (LEAQ [c] {s} y) x)
        // cond: x.Op != OpSB && y.Op != OpSB
        // result: (LEAQ1 [c] {s} x y)
        for {
@@ -1420,8 +1539,8 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                }
                c := v_0.AuxInt
                s := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                if !(x.Op != OpSB && y.Op != OpSB) {
                        break
                }
@@ -1447,6 +1566,21 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDQ (NEGQ y) x)
+       // cond:
+       // result: (SUBQ x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64SUBQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDQ x l:(MOVQload [off] {sym} ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
        // result: (ADDQmem x [off] {sym} ptr mem)
@@ -3358,20 +3492,20 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} x (ADDQconst [d] y))
-       // cond: is32Bit(c+d)   && y.Op != OpSB
+       // match: (LEAQ1 [c] {s} y (ADDQconst [d] x))
+       // cond: is32Bit(c+d)   && x.Op != OpSB
        // result: (LEAQ1 [c+d] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
                d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is32Bit(c+d) && y.Op != OpSB) {
+               x := v_1.Args[0]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
                v.reset(OpAMD64LEAQ1)
@@ -3403,9 +3537,9 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [1] x) y)
+       // match: (LEAQ1 [c] {s} (SHLQconst [1] y) x)
        // cond:
-       // result: (LEAQ2 [c] {s} y x)
+       // result: (LEAQ2 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -3416,13 +3550,13 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                if v_0.AuxInt != 1 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(OpAMD64LEAQ2)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAQ1 [c] {s} x (SHLQconst [2] y))
@@ -3447,9 +3581,9 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [2] x) y)
+       // match: (LEAQ1 [c] {s} (SHLQconst [2] y) x)
        // cond:
-       // result: (LEAQ4 [c] {s} y x)
+       // result: (LEAQ4 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -3460,13 +3594,13 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                if v_0.AuxInt != 2 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(OpAMD64LEAQ4)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAQ1 [c] {s} x (SHLQconst [3] y))
@@ -3491,9 +3625,9 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [3] x) y)
+       // match: (LEAQ1 [c] {s} (SHLQconst [3] y) x)
        // cond:
-       // result: (LEAQ8 [c] {s} y x)
+       // result: (LEAQ8 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -3504,13 +3638,13 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                if v_0.AuxInt != 3 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
+               y := v_0.Args[0]
+               x := v.Args[1]
                v.reset(OpAMD64LEAQ8)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        // match: (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
@@ -3537,21 +3671,21 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [off1] {sym1} x (LEAQ [off2] {sym2} y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB
+       // match: (LEAQ1 [off1] {sym1} y (LEAQ [off2] {sym2} x))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
        // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               x := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64LEAQ {
                        break
                }
                off2 := v_1.AuxInt
                sym2 := v_1.Aux
-               y := v_1.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB) {
+               x := v_1.Args[0]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
                v.reset(OpAMD64LEAQ1)
@@ -4195,7 +4329,7 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBload  [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -4218,7 +4352,7 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBload  [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // match: (MOVBload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVBload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -4292,7 +4426,7 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: canMergeSym(sym1, sym2)
        // result: (MOVBload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -4316,7 +4450,7 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBload  [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -4364,6 +4498,28 @@ func rewriteValueAMD64_OpAMD64MOVBloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond:
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
        // cond:
        // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
@@ -4386,6 +4542,28 @@ func rewriteValueAMD64_OpAMD64MOVBloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
@@ -4433,7 +4611,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // match: (MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -4481,7 +4659,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -4561,7 +4739,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w   x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
+       // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
        // cond: x0.Uses == 1   && clobber(x0)
        // result: (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
        for {
@@ -4607,7 +4785,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w   x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w)   x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w)   x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
+       // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
        // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
        // result: (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
        for {
@@ -4698,7 +4876,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w   x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w)   x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w)   x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w)   x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w)   x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w)   x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w)   x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
+       // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
        // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
        // result: (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
        for {
@@ -4972,7 +5150,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // match: (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: canMergeSym(sym1, sym2)
        // result: (MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -4998,7 +5176,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // match: (MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -5330,7 +5508,7 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx w   x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
+       // match: (MOVBstoreidx1 [i] {s} p idx w x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
        // cond: x0.Uses == 1   && clobber(x0)
        // result: (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
        for {
@@ -5381,7 +5559,7 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx w   x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w)   x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w)   x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
+       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
        // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)
        // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
@@ -5483,7 +5661,7 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx w   x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w)   x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w)   x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w)   x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w)   x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w)   x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w)   x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
        // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)
        // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
        for {
@@ -6168,7 +6346,7 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVLload  [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVLload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVLload  [off1+off2] {sym} ptr mem)
        for {
@@ -6191,7 +6369,7 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLload  [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // match: (MOVLload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -6291,7 +6469,7 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: canMergeSym(sym1, sym2)
        // result: (MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -6315,7 +6493,7 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLload  [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVLload  [off1+off2] {sym} ptr mem)
        for {
@@ -6365,36 +6543,82 @@ func rewriteValueAMD64_OpAMD64MOVLloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // match: (MOVLloadidx1 [c] {sym} (SHLQconst [2] idx) ptr mem)
        // cond:
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = c + d
+               v.reset(OpAMD64MOVLloadidx4)
+               v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
        // cond:
        // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond:
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond:
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
@@ -6409,6 +6633,28 @@ func rewriteValueAMD64_OpAMD64MOVLloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64MOVLloadidx4(v *Value) bool {
@@ -6501,7 +6747,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore  [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVLstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -6549,7 +6795,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore  [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -6748,7 +6994,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // match: (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: canMergeSym(sym1, sym2)
        // result: (MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -6774,7 +7020,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // match: (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVLstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -7549,7 +7795,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx4(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool {
-       // match: (MOVOload  [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVOload  [off1+off2] {sym} ptr mem)
        for {
@@ -7599,7 +7845,7 @@ func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool {
-       // match: (MOVOstore  [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVOstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -7726,7 +7972,7 @@ func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVQload  [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVQload  [off1+off2] {sym} ptr mem)
        for {
@@ -7749,7 +7995,7 @@ func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQload  [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVQload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -7849,7 +8095,7 @@ func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: canMergeSym(sym1, sym2)
        // result: (MOVQload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -7873,7 +8119,7 @@ func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQload  [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVQload  [off1+off2] {sym} ptr mem)
        for {
@@ -7923,6 +8169,30 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVQloadidx1 [c] {sym} (SHLQconst [3] idx) ptr mem)
+       // cond:
+       // result: (MOVQloadidx8 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 3 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVQloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
        // cond:
        // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
@@ -7945,6 +8215,28 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVQloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond:
+       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVQloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
        // cond:
        // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
@@ -7967,6 +8259,28 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVQloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64MOVQloadidx8(v *Value) bool {
@@ -8017,7 +8331,7 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx8(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
-       // match: (MOVQstore  [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVQstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -8065,7 +8379,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore  [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVQstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -8173,7 +8487,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: canMergeSym(sym1, sym2)
        // result: (MOVQstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -8199,7 +8513,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // match: (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVQstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -10008,7 +10322,7 @@ func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWload  [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVWload  [off1+off2] {sym} ptr mem)
        for {
@@ -10031,7 +10345,7 @@ func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload  [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -10131,7 +10445,7 @@ func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload  [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: canMergeSym(sym1, sym2)
        // result: (MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -10155,7 +10469,7 @@ func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload  [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVWload  [off1+off2] {sym} ptr mem)
        for {
@@ -10205,6 +10519,30 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVWloadidx1 [c] {sym} (SHLQconst [1] idx) ptr mem)
+       // cond:
+       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVWloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
        // cond:
        // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
@@ -10227,6 +10565,28 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVWloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVWloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
        // cond:
        // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
@@ -10249,6 +10609,28 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx1(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVWloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64MOVWloadidx2(v *Value) bool {
@@ -10341,7 +10723,7 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore  [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -10389,7 +10771,7 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore  [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -10588,7 +10970,7 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: canMergeSym(sym1, sym2)
        // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -10614,7 +10996,7 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore  [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -12087,9 +12469,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORL (SHLLconst x [c]) (SHRLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [   c])
+       // match: (ORL (SHLLconst x [c]) (SHRLconst x [d]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHLLconst {
@@ -12101,10 +12483,11 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                if v_1.Op != OpAMD64SHRLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpAMD64ROLLconst)
@@ -12112,34 +12495,35 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORL (SHRLconst x [c]) (SHLLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [32-c])
+       // match: (ORL (SHRLconst x [d]) (SHLLconst x [c]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRLconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpAMD64ROLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: ( ORL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
+       // match: (ORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -12152,13 +12536,11 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                if v_1.Op != OpAMD64SHRWconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
                        break
                }
                v.reset(OpAMD64ROLWconst)
@@ -12166,38 +12548,36 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
+       // match: (ORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
                        break
                }
                v.reset(OpAMD64ROLWconst)
-               v.AuxInt = 16 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: ( ORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
+       // match: (ORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -12210,13 +12590,11 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                if v_1.Op != OpAMD64SHRBconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
                        break
                }
                v.reset(OpAMD64ROLBconst)
@@ -12224,32 +12602,30 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
+       // match: (ORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRBconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
                        break
                }
                v.reset(OpAMD64ROLBconst)
-               v.AuxInt = 8 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -12266,49 +12642,30 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORL x:(SHLLconst _) y)
-       // cond: y.Op != OpAMD64SHLLconst
-       // result: (ORL y x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64SHLLconst {
-                       break
-               }
-               y := v.Args[1]
-               if !(y.Op != OpAMD64SHLLconst) {
-                       break
-               }
-               v.reset(OpAMD64ORL)
-               v.AddArg(y)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL                  x0:(MOVBload [i]   {s} p mem)     s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
+       // match: (ORL x0:(MOVBload [i0] {s} p mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
        for {
                x0 := v.Args[0]
                if x0.Op != OpAMD64MOVBload {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := s0.Args[0]
+               x1 := sh.Args[0]
                if x1.Op != OpAMD64MOVBload {
                        break
                }
-               if x1.AuxInt != i+1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -12318,256 +12675,369 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORL o0:(ORL                        x0:(MOVWload [i]   {s} p mem)     s0:(SHLLconst [16] x1:(MOVBload [i+2] {s} p mem)))     s1:(SHLLconst [24] x2:(MOVBload [i+3] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p mem)
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpAMD64ORL {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               if sh.AuxInt != 8 {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o0.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               if s0.AuxInt != 16 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x1.AuxInt != i+2 {
+               if p != x0.Args[0] {
                        break
                }
-               if x1.Aux != s {
+               if mem != x0.Args[1] {
                        break
                }
-               if p != x1.Args[0] {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if mem != x1.Args[1] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVWload [i0] {s} p mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
-                       break
-               }
-               if s1.AuxInt != 24 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBload {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x2.AuxInt != i+3 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
-               if x2.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x2.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x2.Args[1] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
+               b = mergePoint(b, x0, x1)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORL                  x0:(MOVBloadidx1 [i]   {s} p idx mem)     s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)) x0:(MOVWload [i0] {s} p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
-                       break
-               }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               if s0.AuxInt != 8 {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x1.AuxInt != i+1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
-               if x1.Aux != s {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
-               if p != x1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
-               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORL o0:(ORL                        x0:(MOVWloadidx1 [i]   {s} p idx mem)     s0:(SHLLconst [16] x1:(MOVBloadidx1 [i+2] {s} p idx mem)))     s1:(SHLLconst [24] x2:(MOVBloadidx1 [i+3] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpAMD64ORL {
-                       break
-               }
-               x0 := o0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := o0.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               if s0.AuxInt != 16 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               if x1.AuxInt != i+2 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if mem != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               s1 := v.Args[1]
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               s1 := v.Args[0]
                if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               if s1.AuxInt != 24 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBloadidx1 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
                        break
                }
-               if x2.AuxInt != i+3 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               if x2.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               if p != x2.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x2.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x2.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORL                        x0:(MOVBload [i] {s} p mem)     s0:(SHLLconst [8]  x1:(MOVBload [i-1] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i-1] {s} p mem))
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y) s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
        for {
-               x0 := v.Args[0]
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVBload {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
-               s0 := v.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))) s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
                if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               if x1.AuxInt != i-1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -12577,49 +13047,50 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v1.AuxInt = i - 1
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORL                        x0:(MOVBloadidx1 [i] {s} p idx mem)     s0:(SHLLconst [8]  x1:(MOVBloadidx1 [i-1] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 <v.Type> [i-1] {s} p idx mem))
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
                x0 := v.Args[0]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := s0.Args[0]
+               x1 := sh.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -12632,674 +13103,12419 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v1.AuxInt = i - 1
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL o1:(ORL o0:(ROLWconst [8] x01:(MOVWload [i1] {s} p mem))     s1:(SHLLconst [16] x2:(MOVBload [i1-1] {s} p mem)))     s2:(SHLLconst [24] x3:(MOVBload [i1-2] {s} p mem)))
-       // cond: x01.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && mergePoint(b,x01,x2,x3) != nil   && clobber(x01)   && clobber(x2)   && clobber(x3)   && clobber(s1)   && clobber(s2)   && clobber(o0)   && clobber(o1)
-       // result: @mergePoint(b,x01,x2,x3) (BSWAPL <v.Type> (MOVLload [i1-2] {s} p mem))
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               o1 := v.Args[0]
-               if o1.Op != OpAMD64ORL {
-                       break
-               }
-               o0 := o1.Args[0]
-               if o0.Op != OpAMD64ROLWconst {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if o0.AuxInt != 8 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               x01 := o0.Args[0]
-               if x01.Op != OpAMD64MOVWload {
+               if sh.AuxInt != 8 {
                        break
                }
-               i1 := x01.AuxInt
-               s := x01.Aux
-               p := x01.Args[0]
-               mem := x01.Args[1]
-               s1 := o1.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if s1.AuxInt != 16 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBload {
+               if p != x1.Args[0] {
                        break
                }
-               if x2.AuxInt != i1-1 {
+               if idx != x1.Args[1] {
                        break
                }
-               if x2.Aux != s {
+               if mem != x1.Args[2] {
                        break
                }
-               if p != x2.Args[0] {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if mem != x2.Args[1] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               s2 := v.Args[1]
-               if s2.Op != OpAMD64SHLLconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if s2.AuxInt != 24 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBload {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x3.AuxInt != i1-2 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x3.Aux != s {
+               if idx != x1.Args[0] {
                        break
                }
-               if p != x3.Args[0] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x3.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(x01.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && mergePoint(b, x01, x2, x3) != nil && clobber(x01) && clobber(x2) && clobber(x3) && clobber(s1) && clobber(s2) && clobber(o0) && clobber(o1)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               b = mergePoint(b, x01, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v1.AuxInt = i1 - 2
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL o1:(ORL o0:(ROLWconst [8] x01:(MOVWloadidx1 [i1] {s} p idx mem))     s1:(SHLLconst [16] x2:(MOVBloadidx1 [i1-1] {s} p idx mem)))     s2:(SHLLconst [24] x3:(MOVBloadidx1 [i1-2] {s} p idx mem)))
-       // cond: x01.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && mergePoint(b,x01,x2,x3) != nil   && clobber(x01)   && clobber(x2)   && clobber(x3)   && clobber(s1)   && clobber(s2)   && clobber(o0)   && clobber(o1)
-       // result: @mergePoint(b,x01,x2,x3) (BSWAPL <v.Type> (MOVLloadidx1 <v.Type> [i1-2] {s} p idx mem))
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               o1 := v.Args[0]
-               if o1.Op != OpAMD64ORL {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               o0 := o1.Args[0]
-               if o0.Op != OpAMD64ROLWconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if o0.AuxInt != 8 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x01 := o0.Args[0]
-               if x01.Op != OpAMD64MOVWloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x01.AuxInt
-               s := x01.Aux
-               p := x01.Args[0]
-               idx := x01.Args[1]
-               mem := x01.Args[2]
-               s1 := o1.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if s1.AuxInt != 16 {
+               if idx != x1.Args[0] {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBloadidx1 {
+               if p != x1.Args[1] {
                        break
                }
-               if x2.AuxInt != i1-1 {
+               if mem != x1.Args[2] {
                        break
                }
-               if x2.Aux != s {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if p != x2.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if idx != x2.Args[1] {
+               if sh.AuxInt != 8 {
                        break
                }
-               if mem != x2.Args[2] {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               s2 := v.Args[1]
-               if s2.Op != OpAMD64SHLLconst {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if s2.AuxInt != 24 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBloadidx1 {
+               if p != x0.Args[0] {
                        break
                }
-               if x3.AuxInt != i1-2 {
+               if idx != x0.Args[1] {
                        break
                }
-               if x3.Aux != s {
+               if mem != x0.Args[2] {
                        break
                }
-               if p != x3.Args[0] {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if idx != x3.Args[1] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if mem != x3.Args[2] {
+               if sh.AuxInt != 8 {
                        break
                }
-               if !(x01.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && mergePoint(b, x01, x2, x3) != nil && clobber(x01) && clobber(x2) && clobber(x3) && clobber(s1) && clobber(s2) && clobber(o0) && clobber(o1)) {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               b = mergePoint(b, x01, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, v.Type)
-               v1.AuxInt = i1 - 2
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL x1:(MOVBload [i1] {s} p mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)) x1:(MOVBload [i1] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y) s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))) s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ORLmem x [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ORLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ORL l:(MOVLload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ORLmem x [off] {sym} ptr mem)
+       for {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ORLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
+       // match: (ORLconst [c] x)
+       // cond: int32(c)==0
+       // result: x
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORLconst [c] _)
+       // cond: int32(c)==-1
+       // result: (MOVLconst [-1])
+       for {
+               c := v.AuxInt
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (ORLconst [c] (MOVLconst [d]))
+       // cond:
+       // result: (MOVLconst [c|d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = c | d
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (ORQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (ORQconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64ORQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (ORQconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64ORQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ (SHLQconst x [c]) (SHRQconst x [d]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ (SHRQconst x [d]) (SHLQconst x [c]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ x x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ x0:(MOVBload [i0] {s} p mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVWload [i0] {s} p mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)) x0:(MOVWload [i0] {s} p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVLload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)) x0:(MOVLload [i0] {s} p mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y) s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))) s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y) s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))) s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)) x0:(MOVLloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)) x0:(MOVLloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)) x0:(MOVLloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)) x0:(MOVLloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ x1:(MOVBload [i1] {s} p mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)) x1:(MOVBload [i1] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))) r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y) s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))) s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORL x l:(MOVLload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ORLmem x [off] {sym} ptr mem)
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64ORLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ORL l:(MOVLload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ORLmem x [off] {sym} ptr mem)
-       for {
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
+               if p != x0.Args[1] {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpAMD64ORLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
-       // match: (ORLconst [c] x)
-       // cond: int32(c)==0
-       // result: x
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(int32(c) == 0) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORLconst [c] _)
-       // cond: int32(c)==-1
-       // result: (MOVLconst [-1])
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               c := v.AuxInt
-               if !(int32(c) == -1) {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = -1
-               return true
-       }
-       // match: (ORLconst [c] (MOVLconst [d]))
-       // cond:
-       // result: (MOVLconst [c|d])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = c | d
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (ORQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (ORQconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               c := v_1.AuxInt
-               if !(is32Bit(c)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpAMD64ORQconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (ORQconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(is32Bit(c)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpAMD64ORQconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORQ (SHLQconst x [c]) (SHRQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [   c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               if idx != x0.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
+               if p != x0.Args[1] {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               if mem != x0.Args[2] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: ( ORQ (SHRQconst x [c]) (SHLQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [64-c])
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if x != v_1.Args[0] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = 64 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORQ x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORQ x:(SHLQconst _) y)
-       // cond: y.Op != OpAMD64SHLQconst
-       // result: (ORQ y x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64SHLQconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               y := v.Args[1]
-               if !(y.Op != OpAMD64SHLQconst) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64ORQ)
-               v.AddArg(y)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ                        x0:(MOVBload [i]   {s} p mem)     s0:(SHLQconst [8]  x1:(MOVBload [i+1] {s} p mem)))     s1:(SHLQconst [16] x2:(MOVBload [i+2] {s} p mem)))     s2:(SHLQconst [24] x3:(MOVBload [i+3] {s} p mem)))     s3:(SHLQconst [32] x4:(MOVBload [i+4] {s} p mem)))     s4:(SHLQconst [40] x5:(MOVBload [i+5] {s} p mem)))     s5:(SHLQconst [48] x6:(MOVBload [i+6] {s} p mem)))     s6:(SHLQconst [56] x7:(MOVBload [i+7] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem)
-       for {
-               o0 := v.Args[0]
-               if o0.Op != OpAMD64ORQ {
+               if p != x0.Args[1] {
                        break
                }
-               o1 := o0.Args[0]
-               if o1.Op != OpAMD64ORQ {
+               if mem != x0.Args[2] {
                        break
                }
-               o2 := o1.Args[0]
-               if o2.Op != OpAMD64ORQ {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               o3 := o2.Args[0]
-               if o3.Op != OpAMD64ORQ {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               o4 := o3.Args[0]
-               if o4.Op != OpAMD64ORQ {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               o5 := o4.Args[0]
-               if o5.Op != OpAMD64ORQ {
+               if r0.AuxInt != 8 {
                        break
                }
-               x0 := o5.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o5.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if s0.AuxInt != 8 {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if x1.AuxInt != i+1 {
+               if r1.AuxInt != 8 {
                        break
                }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
                if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               s1 := o4.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if mem != x1.Args[2] {
                        break
                }
-               if s1.AuxInt != 16 {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBload {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if x2.AuxInt != i+2 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if x2.Aux != s {
+               if r0.AuxInt != 8 {
                        break
                }
-               if p != x2.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x2.Args[1] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpAMD64SHLQconst {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if s2.AuxInt != 24 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBload {
+               if r1.AuxInt != 8 {
                        break
                }
-               if x3.AuxInt != i+3 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x3.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x3.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x3.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpAMD64SHLQconst {
+               if mem != x1.Args[2] {
                        break
                }
-               if s3.AuxInt != 32 {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpAMD64MOVBload {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if x4.AuxInt != i+4 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if x4.Aux != s {
+               if r0.AuxInt != 8 {
                        break
                }
-               if p != x4.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x4.Args[1] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpAMD64SHLQconst {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if s4.AuxInt != 40 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpAMD64MOVBload {
+               if r1.AuxInt != 8 {
                        break
                }
-               if x5.AuxInt != i+5 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x5.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x5.Args[0] {
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x5.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpAMD64SHLQconst {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if s5.AuxInt != 48 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpAMD64MOVBload {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if x6.AuxInt != i+6 {
+               if r0.AuxInt != 8 {
                        break
                }
-               if x6.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if p != x6.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if mem != x6.Args[1] {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpAMD64SHLQconst {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if s6.AuxInt != 56 {
+               if r1.AuxInt != 8 {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpAMD64MOVBload {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x7.AuxInt != i+7 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x7.Aux != s {
+               if idx != x1.Args[0] {
                        break
                }
-               if p != x7.Args[0] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x7.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ                        x0:(MOVBloadidx1 [i]   {s} p idx mem)     s0:(SHLQconst [8]  x1:(MOVBloadidx1 [i+1] {s} p idx mem)))     s1:(SHLQconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem)))     s2:(SHLQconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem)))     s3:(SHLQconst [32] x4:(MOVBloadidx1 [i+4] {s} p idx mem)))     s4:(SHLQconst [40] x5:(MOVBloadidx1 [i+5] {s} p idx mem)))     s5:(SHLQconst [48] x6:(MOVBloadidx1 [i+6] {s} p idx mem)))     s6:(SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem)
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpAMD64ORQ {
-                       break
-               }
-               o1 := o0.Args[0]
-               if o1.Op != OpAMD64ORQ {
-                       break
-               }
-               o2 := o1.Args[0]
-               if o2.Op != OpAMD64ORQ {
-                       break
-               }
-               o3 := o2.Args[0]
-               if o3.Op != OpAMD64ORQ {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               o4 := o3.Args[0]
-               if o4.Op != OpAMD64ORQ {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               o5 := o4.Args[0]
-               if o5.Op != OpAMD64ORQ {
+               if r0.AuxInt != 8 {
                        break
                }
-               x0 := o5.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               s0 := o5.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if s0.AuxInt != 8 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
                        break
                }
-               if x1.AuxInt != i+1 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -13312,621 +25528,917 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               s1 := o4.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if s1.AuxInt != 16 {
-                       break
-               }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBloadidx1 {
-                       break
-               }
-               if x2.AuxInt != i+2 {
-                       break
-               }
-               if x2.Aux != s {
-                       break
-               }
-               if p != x2.Args[0] {
-                       break
-               }
-               if idx != x2.Args[1] {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if mem != x2.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpAMD64SHLQconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if s2.AuxInt != 24 {
+               if r0.AuxInt != 8 {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBloadidx1 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x3.AuxInt != i+3 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if x3.Aux != s {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if p != x3.Args[0] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if idx != x3.Args[1] {
+               if r1.AuxInt != 8 {
                        break
                }
-               if mem != x3.Args[2] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpAMD64SHLQconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if s3.AuxInt != 32 {
+               if p != x1.Args[0] {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpAMD64MOVBloadidx1 {
+               if idx != x1.Args[1] {
                        break
                }
-               if x4.AuxInt != i+4 {
+               if mem != x1.Args[2] {
                        break
                }
-               if x4.Aux != s {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if p != x4.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if idx != x4.Args[1] {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if mem != x4.Args[2] {
+               if r0.AuxInt != 8 {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpAMD64SHLQconst {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if s4.AuxInt != 40 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpAMD64MOVBloadidx1 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x5.AuxInt != i+5 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if x5.Aux != s {
+               if r1.AuxInt != 8 {
                        break
                }
-               if p != x5.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if idx != x5.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if mem != x5.Args[2] {
+               if idx != x1.Args[0] {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpAMD64SHLQconst {
+               if p != x1.Args[1] {
                        break
                }
-               if s5.AuxInt != 48 {
+               if mem != x1.Args[2] {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpAMD64MOVBloadidx1 {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if x6.AuxInt != i+6 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if x6.Aux != s {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if p != x6.Args[0] {
+               if r0.AuxInt != 8 {
                        break
                }
-               if idx != x6.Args[1] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x6.Args[2] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpAMD64SHLQconst {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if s6.AuxInt != 56 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpAMD64MOVBloadidx1 {
+               if r1.AuxInt != 8 {
                        break
                }
-               if x7.AuxInt != i+7 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x7.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x7.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x7.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x7.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ o5:(ORQ o4:(ORQ o3:(ORQ o2:(ORQ o1:(ORQ o0:(ORQ                        x0:(MOVBload [i] {s} p mem)     s0:(SHLQconst [8]  x1:(MOVBload [i-1] {s} p mem)))     s1:(SHLQconst [16] x2:(MOVBload [i-2] {s} p mem)))     s2:(SHLQconst [24] x3:(MOVBload [i-3] {s} p mem)))     s3:(SHLQconst [32] x4:(MOVBload [i-4] {s} p mem)))     s4:(SHLQconst [40] x5:(MOVBload [i-5] {s} p mem)))     s5:(SHLQconst [48] x6:(MOVBload [i-6] {s} p mem)))     s6:(SHLQconst [56] x7:(MOVBload [i-7] {s} p mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (BSWAPQ <v.Type> (MOVQload [i-7] {s} p mem))
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
        for {
-               o5 := v.Args[0]
-               if o5.Op != OpAMD64ORQ {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               o4 := o5.Args[0]
-               if o4.Op != OpAMD64ORQ {
-                       break
-               }
-               o3 := o4.Args[0]
-               if o3.Op != OpAMD64ORQ {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               o2 := o3.Args[0]
-               if o2.Op != OpAMD64ORQ {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               o1 := o2.Args[0]
-               if o1.Op != OpAMD64ORQ {
+               if r1.AuxInt != 8 {
                        break
                }
-               o0 := o1.Args[0]
-               if o0.Op != OpAMD64ORQ {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o0.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               if r0.AuxInt != 8 {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x1.AuxInt != i-1 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x1.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               s1 := o1.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if s1.AuxInt != 16 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBload {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x2.AuxInt != i-2 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if x2.Aux != s {
+               if r1.AuxInt != 8 {
                        break
                }
-               if p != x2.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x2.Args[1] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               s2 := o2.Args[1]
-               if s2.Op != OpAMD64SHLQconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if s2.AuxInt != 24 {
+               if r0.AuxInt != 8 {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBload {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x3.AuxInt != i-3 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x3.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x3.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x3.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               s3 := o3.Args[1]
-               if s3.Op != OpAMD64SHLQconst {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if s3.AuxInt != 32 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpAMD64MOVBload {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x4.AuxInt != i-4 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if x4.Aux != s {
+               if r1.AuxInt != 8 {
                        break
                }
-               if p != x4.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x4.Args[1] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               s4 := o4.Args[1]
-               if s4.Op != OpAMD64SHLQconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if s4.AuxInt != 40 {
+               if r0.AuxInt != 8 {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpAMD64MOVBload {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x5.AuxInt != i-5 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x5.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x5.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x5.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               s5 := o5.Args[1]
-               if s5.Op != OpAMD64SHLQconst {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if s5.AuxInt != 48 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpAMD64MOVBload {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x6.AuxInt != i-6 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if x6.Aux != s {
+               if r1.AuxInt != 8 {
                        break
                }
-               if p != x6.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x6.Args[1] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpAMD64SHLQconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if s6.AuxInt != 56 {
+               if r0.AuxInt != 8 {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpAMD64MOVBload {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x7.AuxInt != i-7 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x7.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x7.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x7.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
-               v1.AuxInt = i - 7
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ o5:(ORQ o4:(ORQ o3:(ORQ o2:(ORQ o1:(ORQ o0:(ORQ                        x0:(MOVBloadidx1 [i] {s} p idx mem)     s0:(SHLQconst [8]  x1:(MOVBloadidx1 [i-1] {s} p idx mem)))     s1:(SHLQconst [16] x2:(MOVBloadidx1 [i-2] {s} p idx mem)))     s2:(SHLQconst [24] x3:(MOVBloadidx1 [i-3] {s} p idx mem)))     s3:(SHLQconst [32] x4:(MOVBloadidx1 [i-4] {s} p idx mem)))     s4:(SHLQconst [40] x5:(MOVBloadidx1 [i-5] {s} p idx mem)))     s5:(SHLQconst [48] x6:(MOVBloadidx1 [i-6] {s} p idx mem)))     s6:(SHLQconst [56] x7:(MOVBloadidx1 [i-7] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (BSWAPQ <v.Type> (MOVQloadidx1 <v.Type> [i-7] {s} p idx mem))
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
        for {
-               o5 := v.Args[0]
-               if o5.Op != OpAMD64ORQ {
-                       break
-               }
-               o4 := o5.Args[0]
-               if o4.Op != OpAMD64ORQ {
-                       break
-               }
-               o3 := o4.Args[0]
-               if o3.Op != OpAMD64ORQ {
-                       break
-               }
-               o2 := o3.Args[0]
-               if o2.Op != OpAMD64ORQ {
-                       break
-               }
-               o1 := o2.Args[0]
-               if o1.Op != OpAMD64ORQ {
-                       break
-               }
-               o0 := o1.Args[0]
-               if o0.Op != OpAMD64ORQ {
-                       break
-               }
-               x0 := o0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
-                       break
-               }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := o0.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if s0.AuxInt != 8 {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x1.AuxInt != i-1 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if x1.Aux != s {
+               if r1.AuxInt != 8 {
                        break
                }
-               if p != x1.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if idx != x1.Args[1] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if mem != x1.Args[2] {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               s1 := o1.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if r0.AuxInt != 8 {
                        break
                }
-               if s1.AuxInt != 16 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpAMD64MOVBloadidx1 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x2.AuxInt != i-2 {
+               if idx != x0.Args[0] {
                        break
                }
-               if x2.Aux != s {
+               if p != x0.Args[1] {
                        break
                }
-               if p != x2.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               if idx != x2.Args[1] {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if mem != x2.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               s2 := o2.Args[1]
-               if s2.Op != OpAMD64SHLQconst {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if s2.AuxInt != 24 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpAMD64MOVBloadidx1 {
+               if r1.AuxInt != 8 {
                        break
                }
-               if x3.AuxInt != i-3 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x3.Aux != s {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if p != x3.Args[0] {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if idx != x3.Args[1] {
+               if r0.AuxInt != 8 {
                        break
                }
-               if mem != x3.Args[2] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               s3 := o3.Args[1]
-               if s3.Op != OpAMD64SHLQconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if s3.AuxInt != 32 {
+               if idx != x0.Args[0] {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpAMD64MOVBloadidx1 {
+               if p != x0.Args[1] {
                        break
                }
-               if x4.AuxInt != i-4 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x4.Aux != s {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if p != x4.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if idx != x4.Args[1] {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if mem != x4.Args[2] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               s4 := o4.Args[1]
-               if s4.Op != OpAMD64SHLQconst {
+               if r1.AuxInt != 8 {
                        break
                }
-               if s4.AuxInt != 40 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpAMD64MOVBloadidx1 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if x5.AuxInt != i-5 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if x5.Aux != s {
+               if r0.AuxInt != 8 {
                        break
                }
-               if p != x5.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if idx != x5.Args[1] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if mem != x5.Args[2] {
+               if idx != x0.Args[0] {
                        break
                }
-               s5 := o5.Args[1]
-               if s5.Op != OpAMD64SHLQconst {
+               if p != x0.Args[1] {
                        break
                }
-               if s5.AuxInt != 48 {
+               if mem != x0.Args[2] {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpAMD64MOVBloadidx1 {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if x6.AuxInt != i-6 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if x6.Aux != s {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if p != x6.Args[0] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if idx != x6.Args[1] {
+               if r1.AuxInt != 8 {
                        break
                }
-               if mem != x6.Args[2] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpAMD64SHLQconst {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               if s6.AuxInt != 56 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpAMD64MOVBloadidx1 {
+               if r0.AuxInt != 8 {
                        break
                }
-               if x7.AuxInt != i-7 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if x7.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x7.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x7.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x7.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, v.Type)
-               v1.AuxInt = i - 7
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
        // match: (ORQ x l:(MOVQload [off] {sym} ptr mem))
@@ -14902,6 +27414,37 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETEQ (TESTL y (SHLL (MOVLconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETAE (BTL x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTL {
+                       break
+               }
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLL {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETEQ (TESTQ (SHLQ (MOVQconst [1]) x) y))
        // cond: !config.nacl
        // result: (SETAE (BTQ x y))
@@ -14933,6 +27476,37 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETEQ (TESTQ y (SHLQ (MOVQconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETAE (BTQ x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLQ {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETEQ (TESTLconst [c] x))
        // cond: isPowerOfTwo(c) && log2(c) < 32 && !config.nacl
        // result: (SETAE (BTLconst [log2(c)] x))
@@ -14997,6 +27571,30 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETEQ (TESTQ x (MOVQconst [c])))
+       // cond: isPowerOfTwo(c) && log2(c) < 64 && !config.nacl
+       // result: (SETAE (BTQconst [log2(c)] x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isPowerOfTwo(c) && log2(c) < 64 && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETEQ (InvertFlags x))
        // cond:
        // result: (SETEQ x)
@@ -15412,6 +28010,37 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETNE (TESTL y (SHLL (MOVLconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETB  (BTL x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTL {
+                       break
+               }
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLL {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETNE (TESTQ (SHLQ (MOVQconst [1]) x) y))
        // cond: !config.nacl
        // result: (SETB  (BTQ x y))
@@ -15443,6 +28072,37 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETNE (TESTQ y (SHLQ (MOVQconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETB  (BTQ x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLQ {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETNE (TESTLconst [c] x))
        // cond: isPowerOfTwo(c) && log2(c) < 32 && !config.nacl
        // result: (SETB  (BTLconst [log2(c)] x))
@@ -15507,6 +28167,30 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SETNE (TESTQ x (MOVQconst [c])))
+       // cond: isPowerOfTwo(c) && log2(c) < 64 && !config.nacl
+       // result: (SETB  (BTQconst [log2(c)] x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isPowerOfTwo(c) && log2(c) < 64 && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETNE (InvertFlags x))
        // cond:
        // result: (SETNE x)
@@ -16390,23 +29074,6 @@ func rewriteValueAMD64_OpAMD64TESTB(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64TESTL(v *Value) bool {
-       // match: (TESTL y x:(SHLL _ _))
-       // cond: y.Op != OpAMD64SHLL
-       // result: (TESTL x y)
-       for {
-               y := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64SHLL {
-                       break
-               }
-               if !(y.Op != OpAMD64SHLL) {
-                       break
-               }
-               v.reset(OpAMD64TESTL)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
        // match: (TESTL (MOVLconst [c]) x)
        // cond:
        // result: (TESTLconst [c] x)
@@ -16440,23 +29107,6 @@ func rewriteValueAMD64_OpAMD64TESTL(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64TESTQ(v *Value) bool {
-       // match: (TESTQ y x:(SHLQ _ _))
-       // cond: y.Op != OpAMD64SHLQ
-       // result: (TESTQ x y)
-       for {
-               y := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64SHLQ {
-                       break
-               }
-               if !(y.Op != OpAMD64SHLQ) {
-                       break
-               }
-               v.reset(OpAMD64TESTQ)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
        // match: (TESTQ (MOVQconst [c]) x)
        // cond: is32Bit(c)
        // result: (TESTQconst [c] x)
@@ -16723,9 +29373,9 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL (SHLLconst x [c]) (SHRLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [   c])
+       // match: (XORL (SHLLconst x [c]) (SHRLconst x [d]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHLLconst {
@@ -16737,10 +29387,11 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                if v_1.Op != OpAMD64SHRLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpAMD64ROLLconst)
@@ -16748,34 +29399,35 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL (SHRLconst x [c]) (SHLLconst x [32-c]))
-       // cond:
-       // result: (ROLLconst x [32-c])
+       // match: (XORL (SHRLconst x [d]) (SHLLconst x [c]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRLconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpAMD64ROLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [16-c]))
-       // cond: c < 16 && t.Size() == 2
-       // result: (ROLWconst x [   c])
+       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -16788,13 +29440,11 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                if v_1.Op != OpAMD64SHRWconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 16 && t.Size() == 2) {
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
                        break
                }
                v.reset(OpAMD64ROLWconst)
@@ -16802,38 +29452,36 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHRWconst x [c]) (SHLLconst x [16-c]))
-       // cond: c > 0  && t.Size() == 2
-       // result: (ROLWconst x [16-c])
+       // match: (XORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 16-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 2) {
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
                        break
                }
                v.reset(OpAMD64ROLWconst)
-               v.AuxInt = 16 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [ 8-c]))
-       // cond: c < 8 && t.Size() == 1
-       // result: (ROLBconst x [   c])
+       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
@@ -16846,13 +29494,11 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                if v_1.Op != OpAMD64SHRBconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               d := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c < 8 && t.Size() == 1) {
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
                        break
                }
                v.reset(OpAMD64ROLBconst)
@@ -16860,32 +29506,30 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORL <t> (SHRBconst x [c]) (SHLLconst x [ 8-c]))
-       // cond: c > 0 && t.Size() == 1
-       // result: (ROLBconst x [ 8-c])
+       // match: (XORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: d==8-c  && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
                t := v.Type
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRBconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 8-c {
-                       break
-               }
+               c := v_1.AuxInt
                if x != v_1.Args[0] {
                        break
                }
-               if !(c > 0 && t.Size() == 1) {
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
                        break
                }
                v.reset(OpAMD64ROLBconst)
-               v.AuxInt = 8 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -17035,9 +29679,9 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORQ (SHLQconst x [c]) (SHRQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [   c])
+       // match: (XORQ (SHLQconst x [c]) (SHRQconst x [d]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHLQconst {
@@ -17049,10 +29693,11 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
                if v_1.Op != OpAMD64SHRQconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpAMD64ROLQconst)
@@ -17060,28 +29705,29 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORQ (SHRQconst x [c]) (SHLQconst x [64-c]))
-       // cond:
-       // result: (ROLQconst x [64-c])
+       // match: (XORQ (SHRQconst x [d]) (SHLQconst x [c]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpAMD64ROLQconst)
-               v.AuxInt = 64 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -17194,7 +29840,7 @@ func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAdd16(v *Value) bool {
-       // match: (Add16  x y)
+       // match: (Add16 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -17207,7 +29853,7 @@ func rewriteValueAMD64_OpAdd16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpAdd32(v *Value) bool {
-       // match: (Add32  x y)
+       // match: (Add32 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -17233,7 +29879,7 @@ func rewriteValueAMD64_OpAdd32F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpAdd64(v *Value) bool {
-       // match: (Add64  x y)
+       // match: (Add64 x y)
        // cond:
        // result: (ADDQ  x y)
        for {
@@ -17259,7 +29905,7 @@ func rewriteValueAMD64_OpAdd64F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpAdd8(v *Value) bool {
-       // match: (Add8   x y)
+       // match: (Add8 x y)
        // cond:
        // result: (ADDL  x y)
        for {
@@ -17381,7 +30027,7 @@ func rewriteValueAMD64_OpAnd64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpAnd8(v *Value) bool {
-       // match: (And8  x y)
+       // match: (And8 x y)
        // cond:
        // result: (ANDL x y)
        for {
@@ -17826,7 +30472,7 @@ func rewriteValueAMD64_OpCom64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpCom8(v *Value) bool {
-       // match: (Com8  x)
+       // match: (Com8 x)
        // cond:
        // result: (NOTL x)
        for {
@@ -17837,7 +30483,7 @@ func rewriteValueAMD64_OpCom8(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpConst16(v *Value) bool {
-       // match: (Const16  [val])
+       // match: (Const16 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -17848,7 +30494,7 @@ func rewriteValueAMD64_OpConst16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpConst32(v *Value) bool {
-       // match: (Const32  [val])
+       // match: (Const32 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -17870,7 +30516,7 @@ func rewriteValueAMD64_OpConst32F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpConst64(v *Value) bool {
-       // match: (Const64  [val])
+       // match: (Const64 [val])
        // cond:
        // result: (MOVQconst [val])
        for {
@@ -17892,7 +30538,7 @@ func rewriteValueAMD64_OpConst64F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpConst8(v *Value) bool {
-       // match: (Const8   [val])
+       // match: (Const8 [val])
        // cond:
        // result: (MOVLconst [val])
        for {
@@ -18161,7 +30807,7 @@ func rewriteValueAMD64_OpDiv16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div16  x y)
+       // match: (Div16 x y)
        // cond:
        // result: (Select0 (DIVW  x y))
        for {
@@ -18199,7 +30845,7 @@ func rewriteValueAMD64_OpDiv32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div32  x y)
+       // match: (Div32 x y)
        // cond:
        // result: (Select0 (DIVL  x y))
        for {
@@ -18250,7 +30896,7 @@ func rewriteValueAMD64_OpDiv64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div64  x y)
+       // match: (Div64 x y)
        // cond:
        // result: (Select0 (DIVQ  x y))
        for {
@@ -18301,7 +30947,7 @@ func rewriteValueAMD64_OpDiv8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8   x y)
+       // match: (Div8 x y)
        // cond:
        // result: (Select0 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
        for {
@@ -18324,7 +30970,7 @@ func rewriteValueAMD64_OpDiv8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8u  x y)
+       // match: (Div8u x y)
        // cond:
        // result: (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
        for {
@@ -18345,7 +30991,7 @@ func rewriteValueAMD64_OpDiv8u(v *Value) bool {
 func rewriteValueAMD64_OpEq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq16  x y)
+       // match: (Eq16 x y)
        // cond:
        // result: (SETEQ (CMPW x y))
        for {
@@ -18362,7 +31008,7 @@ func rewriteValueAMD64_OpEq16(v *Value) bool {
 func rewriteValueAMD64_OpEq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq32  x y)
+       // match: (Eq32 x y)
        // cond:
        // result: (SETEQ (CMPL x y))
        for {
@@ -18396,7 +31042,7 @@ func rewriteValueAMD64_OpEq32F(v *Value) bool {
 func rewriteValueAMD64_OpEq64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq64  x y)
+       // match: (Eq64 x y)
        // cond:
        // result: (SETEQ (CMPQ x y))
        for {
@@ -18430,7 +31076,7 @@ func rewriteValueAMD64_OpEq64F(v *Value) bool {
 func rewriteValueAMD64_OpEq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq8   x y)
+       // match: (Eq8 x y)
        // cond:
        // result: (SETEQ (CMPB x y))
        for {
@@ -18447,7 +31093,7 @@ func rewriteValueAMD64_OpEq8(v *Value) bool {
 func rewriteValueAMD64_OpEqB(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (EqB   x y)
+       // match: (EqB x y)
        // cond:
        // result: (SETEQ (CMPB x y))
        for {
@@ -18503,7 +31149,7 @@ func rewriteValueAMD64_OpEqPtr(v *Value) bool {
 func rewriteValueAMD64_OpGeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq16  x y)
+       // match: (Geq16 x y)
        // cond:
        // result: (SETGE (CMPW x y))
        for {
@@ -18537,7 +31183,7 @@ func rewriteValueAMD64_OpGeq16U(v *Value) bool {
 func rewriteValueAMD64_OpGeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq32  x y)
+       // match: (Geq32 x y)
        // cond:
        // result: (SETGE (CMPL x y))
        for {
@@ -18588,7 +31234,7 @@ func rewriteValueAMD64_OpGeq32U(v *Value) bool {
 func rewriteValueAMD64_OpGeq64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq64  x y)
+       // match: (Geq64 x y)
        // cond:
        // result: (SETGE (CMPQ x y))
        for {
@@ -18639,7 +31285,7 @@ func rewriteValueAMD64_OpGeq64U(v *Value) bool {
 func rewriteValueAMD64_OpGeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq8   x y)
+       // match: (Geq8 x y)
        // cond:
        // result: (SETGE (CMPB x y))
        for {
@@ -18656,7 +31302,7 @@ func rewriteValueAMD64_OpGeq8(v *Value) bool {
 func rewriteValueAMD64_OpGeq8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Geq8U  x y)
+       // match: (Geq8U x y)
        // cond:
        // result: (SETAE (CMPB x y))
        for {
@@ -18693,7 +31339,7 @@ func rewriteValueAMD64_OpGetG(v *Value) bool {
 func rewriteValueAMD64_OpGreater16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater16  x y)
+       // match: (Greater16 x y)
        // cond:
        // result: (SETG (CMPW x y))
        for {
@@ -18727,7 +31373,7 @@ func rewriteValueAMD64_OpGreater16U(v *Value) bool {
 func rewriteValueAMD64_OpGreater32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater32  x y)
+       // match: (Greater32 x y)
        // cond:
        // result: (SETG (CMPL x y))
        for {
@@ -18778,7 +31424,7 @@ func rewriteValueAMD64_OpGreater32U(v *Value) bool {
 func rewriteValueAMD64_OpGreater64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater64  x y)
+       // match: (Greater64 x y)
        // cond:
        // result: (SETG (CMPQ x y))
        for {
@@ -18829,7 +31475,7 @@ func rewriteValueAMD64_OpGreater64U(v *Value) bool {
 func rewriteValueAMD64_OpGreater8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater8   x y)
+       // match: (Greater8 x y)
        // cond:
        // result: (SETG (CMPB x y))
        for {
@@ -18846,7 +31492,7 @@ func rewriteValueAMD64_OpGreater8(v *Value) bool {
 func rewriteValueAMD64_OpGreater8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Greater8U  x y)
+       // match: (Greater8U x y)
        // cond:
        // result: (SETA (CMPB x y))
        for {
@@ -18861,7 +31507,7 @@ func rewriteValueAMD64_OpGreater8U(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpHmul32(v *Value) bool {
-       // match: (Hmul32  x y)
+       // match: (Hmul32 x y)
        // cond:
        // result: (HMULL  x y)
        for {
@@ -18887,7 +31533,7 @@ func rewriteValueAMD64_OpHmul32u(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpHmul64(v *Value) bool {
-       // match: (Hmul64  x y)
+       // match: (Hmul64 x y)
        // cond:
        // result: (HMULQ  x y)
        for {
@@ -19013,7 +31659,7 @@ func rewriteValueAMD64_OpIsSliceInBounds(v *Value) bool {
 func rewriteValueAMD64_OpLeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq16  x y)
+       // match: (Leq16 x y)
        // cond:
        // result: (SETLE (CMPW x y))
        for {
@@ -19047,7 +31693,7 @@ func rewriteValueAMD64_OpLeq16U(v *Value) bool {
 func rewriteValueAMD64_OpLeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq32  x y)
+       // match: (Leq32 x y)
        // cond:
        // result: (SETLE (CMPL x y))
        for {
@@ -19098,7 +31744,7 @@ func rewriteValueAMD64_OpLeq32U(v *Value) bool {
 func rewriteValueAMD64_OpLeq64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq64  x y)
+       // match: (Leq64 x y)
        // cond:
        // result: (SETLE (CMPQ x y))
        for {
@@ -19149,7 +31795,7 @@ func rewriteValueAMD64_OpLeq64U(v *Value) bool {
 func rewriteValueAMD64_OpLeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq8   x y)
+       // match: (Leq8 x y)
        // cond:
        // result: (SETLE (CMPB x y))
        for {
@@ -19166,7 +31812,7 @@ func rewriteValueAMD64_OpLeq8(v *Value) bool {
 func rewriteValueAMD64_OpLeq8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Leq8U  x y)
+       // match: (Leq8U x y)
        // cond:
        // result: (SETBE (CMPB x y))
        for {
@@ -19183,7 +31829,7 @@ func rewriteValueAMD64_OpLeq8U(v *Value) bool {
 func rewriteValueAMD64_OpLess16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less16  x y)
+       // match: (Less16 x y)
        // cond:
        // result: (SETL (CMPW x y))
        for {
@@ -19217,7 +31863,7 @@ func rewriteValueAMD64_OpLess16U(v *Value) bool {
 func rewriteValueAMD64_OpLess32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less32  x y)
+       // match: (Less32 x y)
        // cond:
        // result: (SETL (CMPL x y))
        for {
@@ -19268,7 +31914,7 @@ func rewriteValueAMD64_OpLess32U(v *Value) bool {
 func rewriteValueAMD64_OpLess64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less64  x y)
+       // match: (Less64 x y)
        // cond:
        // result: (SETL (CMPQ x y))
        for {
@@ -19319,7 +31965,7 @@ func rewriteValueAMD64_OpLess64U(v *Value) bool {
 func rewriteValueAMD64_OpLess8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less8   x y)
+       // match: (Less8 x y)
        // cond:
        // result: (SETL (CMPB x y))
        for {
@@ -19336,7 +31982,7 @@ func rewriteValueAMD64_OpLess8(v *Value) bool {
 func rewriteValueAMD64_OpLess8U(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Less8U  x y)
+       // match: (Less8U x y)
        // cond:
        // result: (SETB (CMPB x y))
        for {
@@ -19522,7 +32168,7 @@ func rewriteValueAMD64_OpLsh16x64(v *Value) bool {
 func rewriteValueAMD64_OpLsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x8  <t> x y)
+       // match: (Lsh16x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -19618,7 +32264,7 @@ func rewriteValueAMD64_OpLsh32x64(v *Value) bool {
 func rewriteValueAMD64_OpLsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x8  <t> x y)
+       // match: (Lsh32x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -19714,7 +32360,7 @@ func rewriteValueAMD64_OpLsh64x64(v *Value) bool {
 func rewriteValueAMD64_OpLsh64x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh64x8  <t> x y)
+       // match: (Lsh64x8 <t> x y)
        // cond:
        // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
        for {
@@ -19810,7 +32456,7 @@ func rewriteValueAMD64_OpLsh8x64(v *Value) bool {
 func rewriteValueAMD64_OpLsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x8  <t> x y)
+       // match: (Lsh8x8 <t> x y)
        // cond:
        // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -19836,7 +32482,7 @@ func rewriteValueAMD64_OpMod16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod16  x y)
+       // match: (Mod16 x y)
        // cond:
        // result: (Select1 (DIVW  x y))
        for {
@@ -19874,7 +32520,7 @@ func rewriteValueAMD64_OpMod32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod32  x y)
+       // match: (Mod32 x y)
        // cond:
        // result: (Select1 (DIVL  x y))
        for {
@@ -19912,7 +32558,7 @@ func rewriteValueAMD64_OpMod64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod64  x y)
+       // match: (Mod64 x y)
        // cond:
        // result: (Select1 (DIVQ  x y))
        for {
@@ -19950,7 +32596,7 @@ func rewriteValueAMD64_OpMod8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8   x y)
+       // match: (Mod8 x y)
        // cond:
        // result: (Select1 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
        for {
@@ -19973,7 +32619,7 @@ func rewriteValueAMD64_OpMod8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8u  x y)
+       // match: (Mod8u x y)
        // cond:
        // result: (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
        for {
@@ -20350,7 +32996,7 @@ func rewriteValueAMD64_OpMove(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpMul16(v *Value) bool {
-       // match: (Mul16  x y)
+       // match: (Mul16 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -20363,7 +33009,7 @@ func rewriteValueAMD64_OpMul16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpMul32(v *Value) bool {
-       // match: (Mul32  x y)
+       // match: (Mul32 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -20389,7 +33035,7 @@ func rewriteValueAMD64_OpMul32F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpMul64(v *Value) bool {
-       // match: (Mul64  x y)
+       // match: (Mul64 x y)
        // cond:
        // result: (MULQ  x y)
        for {
@@ -20428,7 +33074,7 @@ func rewriteValueAMD64_OpMul64uhilo(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpMul8(v *Value) bool {
-       // match: (Mul8   x y)
+       // match: (Mul8 x y)
        // cond:
        // result: (MULL  x y)
        for {
@@ -20441,7 +33087,7 @@ func rewriteValueAMD64_OpMul8(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpNeg16(v *Value) bool {
-       // match: (Neg16  x)
+       // match: (Neg16 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -20452,7 +33098,7 @@ func rewriteValueAMD64_OpNeg16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpNeg32(v *Value) bool {
-       // match: (Neg32  x)
+       // match: (Neg32 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -20481,7 +33127,7 @@ func rewriteValueAMD64_OpNeg32F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpNeg64(v *Value) bool {
-       // match: (Neg64  x)
+       // match: (Neg64 x)
        // cond:
        // result: (NEGQ x)
        for {
@@ -20510,7 +33156,7 @@ func rewriteValueAMD64_OpNeg64F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpNeg8(v *Value) bool {
-       // match: (Neg8   x)
+       // match: (Neg8 x)
        // cond:
        // result: (NEGL x)
        for {
@@ -20523,7 +33169,7 @@ func rewriteValueAMD64_OpNeg8(v *Value) bool {
 func rewriteValueAMD64_OpNeq16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq16  x y)
+       // match: (Neq16 x y)
        // cond:
        // result: (SETNE (CMPW x y))
        for {
@@ -20540,7 +33186,7 @@ func rewriteValueAMD64_OpNeq16(v *Value) bool {
 func rewriteValueAMD64_OpNeq32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq32  x y)
+       // match: (Neq32 x y)
        // cond:
        // result: (SETNE (CMPL x y))
        for {
@@ -20574,7 +33220,7 @@ func rewriteValueAMD64_OpNeq32F(v *Value) bool {
 func rewriteValueAMD64_OpNeq64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq64  x y)
+       // match: (Neq64 x y)
        // cond:
        // result: (SETNE (CMPQ x y))
        for {
@@ -20608,7 +33254,7 @@ func rewriteValueAMD64_OpNeq64F(v *Value) bool {
 func rewriteValueAMD64_OpNeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq8   x y)
+       // match: (Neq8 x y)
        // cond:
        // result: (SETNE (CMPB x y))
        for {
@@ -20625,7 +33271,7 @@ func rewriteValueAMD64_OpNeq8(v *Value) bool {
 func rewriteValueAMD64_OpNeqB(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (NeqB   x y)
+       // match: (NeqB x y)
        // cond:
        // result: (SETNE (CMPB x y))
        for {
@@ -20796,7 +33442,7 @@ func rewriteValueAMD64_OpOr64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpOr8(v *Value) bool {
-       // match: (Or8  x y)
+       // match: (Or8 x y)
        // cond:
        // result: (ORL x y)
        for {
@@ -20920,7 +33566,7 @@ func rewriteValueAMD64_OpRsh16Ux64(v *Value) bool {
 func rewriteValueAMD64_OpRsh16Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16Ux8  <t> x y)
+       // match: (Rsh16Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16])))
        for {
@@ -21025,7 +33671,7 @@ func rewriteValueAMD64_OpRsh16x64(v *Value) bool {
 func rewriteValueAMD64_OpRsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16x8  <t> x y)
+       // match: (Rsh16x8 <t> x y)
        // cond:
        // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16])))))
        for {
@@ -21124,7 +33770,7 @@ func rewriteValueAMD64_OpRsh32Ux64(v *Value) bool {
 func rewriteValueAMD64_OpRsh32Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32Ux8  <t> x y)
+       // match: (Rsh32Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
@@ -21229,7 +33875,7 @@ func rewriteValueAMD64_OpRsh32x64(v *Value) bool {
 func rewriteValueAMD64_OpRsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32x8  <t> x y)
+       // match: (Rsh32x8 <t> x y)
        // cond:
        // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32])))))
        for {
@@ -21328,7 +33974,7 @@ func rewriteValueAMD64_OpRsh64Ux64(v *Value) bool {
 func rewriteValueAMD64_OpRsh64Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64Ux8  <t> x y)
+       // match: (Rsh64Ux8 <t> x y)
        // cond:
        // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
        for {
@@ -21433,7 +34079,7 @@ func rewriteValueAMD64_OpRsh64x64(v *Value) bool {
 func rewriteValueAMD64_OpRsh64x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64x8  <t> x y)
+       // match: (Rsh64x8 <t> x y)
        // cond:
        // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [64])))))
        for {
@@ -21532,7 +34178,7 @@ func rewriteValueAMD64_OpRsh8Ux64(v *Value) bool {
 func rewriteValueAMD64_OpRsh8Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8Ux8  <t> x y)
+       // match: (Rsh8Ux8 <t> x y)
        // cond:
        // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8])))
        for {
@@ -21637,7 +34283,7 @@ func rewriteValueAMD64_OpRsh8x64(v *Value) bool {
 func rewriteValueAMD64_OpRsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x8  <t> x y)
+       // match: (Rsh8x8 <t> x y)
        // cond:
        // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
        for {
@@ -21703,7 +34349,7 @@ func rewriteValueAMD64_OpSelect0(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpSelect1(v *Value) bool {
-       // match: (Select1     (AddTupleFirst32 tuple _  ))
+       // match: (Select1 (AddTupleFirst32 tuple _))
        // cond:
        // result: (Select1 tuple)
        for {
@@ -21716,7 +34362,7 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
                v.AddArg(tuple)
                return true
        }
-       // match: (Select1     (AddTupleFirst64 tuple _  ))
+       // match: (Select1 (AddTupleFirst64 tuple _))
        // cond:
        // result: (Select1 tuple)
        for {
@@ -21765,7 +34411,7 @@ func rewriteValueAMD64_OpSignExt32to64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSignExt8to16(v *Value) bool {
-       // match: (SignExt8to16  x)
+       // match: (SignExt8to16 x)
        // cond:
        // result: (MOVBQSX x)
        for {
@@ -21776,7 +34422,7 @@ func rewriteValueAMD64_OpSignExt8to16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSignExt8to32(v *Value) bool {
-       // match: (SignExt8to32  x)
+       // match: (SignExt8to32 x)
        // cond:
        // result: (MOVBQSX x)
        for {
@@ -21787,7 +34433,7 @@ func rewriteValueAMD64_OpSignExt8to32(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSignExt8to64(v *Value) bool {
-       // match: (SignExt8to64  x)
+       // match: (SignExt8to64 x)
        // cond:
        // result: (MOVBQSX x)
        for {
@@ -21946,7 +34592,7 @@ func rewriteValueAMD64_OpStore(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpSub16(v *Value) bool {
-       // match: (Sub16  x y)
+       // match: (Sub16 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -21959,7 +34605,7 @@ func rewriteValueAMD64_OpSub16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSub32(v *Value) bool {
-       // match: (Sub32  x y)
+       // match: (Sub32 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -21985,7 +34631,7 @@ func rewriteValueAMD64_OpSub32F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSub64(v *Value) bool {
-       // match: (Sub64  x y)
+       // match: (Sub64 x y)
        // cond:
        // result: (SUBQ  x y)
        for {
@@ -22011,7 +34657,7 @@ func rewriteValueAMD64_OpSub64F(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpSub8(v *Value) bool {
-       // match: (Sub8   x y)
+       // match: (Sub8 x y)
        // cond:
        // result: (SUBL  x y)
        for {
@@ -22059,7 +34705,7 @@ func rewriteValueAMD64_OpSubPtr(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpTrunc16to8(v *Value) bool {
-       // match: (Trunc16to8  x)
+       // match: (Trunc16to8 x)
        // cond:
        // result: x
        for {
@@ -22083,7 +34729,7 @@ func rewriteValueAMD64_OpTrunc32to16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpTrunc32to8(v *Value) bool {
-       // match: (Trunc32to8  x)
+       // match: (Trunc32to8 x)
        // cond:
        // result: x
        for {
@@ -22119,7 +34765,7 @@ func rewriteValueAMD64_OpTrunc64to32(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpTrunc64to8(v *Value) bool {
-       // match: (Trunc64to8  x)
+       // match: (Trunc64to8 x)
        // cond:
        // result: x
        for {
@@ -22170,7 +34816,7 @@ func rewriteValueAMD64_OpXor64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpXor8(v *Value) bool {
-       // match: (Xor8  x y)
+       // match: (Xor8 x y)
        // cond:
        // result: (XORL x y)
        for {
@@ -22531,7 +35177,7 @@ func rewriteValueAMD64_OpZeroExt32to64(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpZeroExt8to16(v *Value) bool {
-       // match: (ZeroExt8to16  x)
+       // match: (ZeroExt8to16 x)
        // cond:
        // result: (MOVBQZX x)
        for {
@@ -22542,7 +35188,7 @@ func rewriteValueAMD64_OpZeroExt8to16(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpZeroExt8to32(v *Value) bool {
-       // match: (ZeroExt8to32  x)
+       // match: (ZeroExt8to32 x)
        // cond:
        // result: (MOVBQZX x)
        for {
@@ -22553,7 +35199,7 @@ func rewriteValueAMD64_OpZeroExt8to32(v *Value) bool {
        }
 }
 func rewriteValueAMD64_OpZeroExt8to64(v *Value) bool {
-       // match: (ZeroExt8to64  x)
+       // match: (ZeroExt8to64 x)
        // cond:
        // result: (MOVBQZX x)
        for {
@@ -22603,6 +35249,37 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
+               // match: (EQ (TESTL y (SHLL (MOVLconst [1]) x)))
+               // cond: !config.nacl
+               // result: (UGE (BTL x y))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTL {
+                               break
+                       }
+                       y := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SHLL {
+                               break
+                       }
+                       v_1_0 := v_1.Args[0]
+                       if v_1_0.Op != OpAMD64MOVLconst {
+                               break
+                       }
+                       if v_1_0.AuxInt != 1 {
+                               break
+                       }
+                       x := v_1.Args[1]
+                       if !(!config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64UGE
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTL, TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       return true
+               }
                // match: (EQ (TESTQ (SHLQ (MOVQconst [1]) x) y))
                // cond: !config.nacl
                // result: (UGE (BTQ x y))
@@ -22634,6 +35311,37 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
+               // match: (EQ (TESTQ y (SHLQ (MOVQconst [1]) x)))
+               // cond: !config.nacl
+               // result: (UGE (BTQ x y))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTQ {
+                               break
+                       }
+                       y := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SHLQ {
+                               break
+                       }
+                       v_1_0 := v_1.Args[0]
+                       if v_1_0.Op != OpAMD64MOVQconst {
+                               break
+                       }
+                       if v_1_0.AuxInt != 1 {
+                               break
+                       }
+                       x := v_1.Args[1]
+                       if !(!config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64UGE
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQ, TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       return true
+               }
                // match: (EQ (TESTLconst [c] x))
                // cond: isPowerOfTwo(c) && log2(c) < 32 && !config.nacl
                // result: (UGE (BTLconst [log2(c)] x))
@@ -22698,6 +35406,30 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
+               // match: (EQ (TESTQ x (MOVQconst [c])))
+               // cond: isPowerOfTwo(c) && log2(c) < 64 && !config.nacl
+               // result: (UGE (BTQconst [log2(c)] x))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTQ {
+                               break
+                       }
+                       x := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64MOVQconst {
+                               break
+                       }
+                       c := v_1.AuxInt
+                       if !(isPowerOfTwo(c) && log2(c) < 64 && !config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64UGE
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, TypeFlags)
+                       v0.AuxInt = log2(c)
+                       v0.AddArg(x)
+                       b.SetControl(v0)
+                       return true
+               }
                // match: (EQ (InvertFlags cmp) yes no)
                // cond:
                // result: (EQ cmp yes no)
@@ -23001,7 +35733,7 @@ func rewriteBlockAMD64(b *Block) bool {
                        return true
                }
        case BlockIf:
-               // match: (If (SETL  cmp) yes no)
+               // match: (If (SETL cmp) yes no)
                // cond:
                // result: (LT  cmp yes no)
                for {
@@ -23035,7 +35767,7 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETG  cmp) yes no)
+               // match: (If (SETG cmp) yes no)
                // cond:
                // result: (GT  cmp yes no)
                for {
@@ -23103,7 +35835,7 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETB  cmp) yes no)
+               // match: (If (SETB cmp) yes no)
                // cond:
                // result: (ULT cmp yes no)
                for {
@@ -23137,7 +35869,7 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETA  cmp) yes no)
+               // match: (If (SETA cmp) yes no)
                // cond:
                // result: (UGT cmp yes no)
                for {
@@ -23171,7 +35903,7 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (If (SETGF  cmp) yes no)
+               // match: (If (SETGF cmp) yes no)
                // cond:
                // result: (UGT  cmp yes no)
                for {
@@ -23263,110 +35995,334 @@ func rewriteBlockAMD64(b *Block) bool {
                // result: (GE cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64InvertFlags {
+                       if v.Op != OpAMD64InvertFlags {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64GE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagEQ) yes no)
+               // cond:
+               // result: (First nil yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64FlagEQ {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagLT_ULT) yes no)
+               // cond:
+               // result: (First nil yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64FlagLT_ULT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagLT_UGT) yes no)
+               // cond:
+               // result: (First nil yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64FlagLT_UGT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LE (FlagGT_ULT) yes no)
+               // cond:
+               // result: (First nil no yes)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64FlagGT_ULT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
+                       _ = no
+                       _ = yes
+                       return true
+               }
+               // match: (LE (FlagGT_UGT) yes no)
+               // cond:
+               // result: (First nil no yes)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64FlagGT_UGT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
+                       _ = no
+                       _ = yes
+                       return true
+               }
+       case BlockAMD64LT:
+               // match: (LT (InvertFlags cmp) yes no)
+               // cond:
+               // result: (GT cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64InvertFlags {
+                               break
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64GT
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LT (FlagEQ) yes no)
+               // cond:
+               // result: (First nil no yes)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64FlagEQ {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
+                       _ = no
+                       _ = yes
+                       return true
+               }
+               // match: (LT (FlagLT_ULT) yes no)
+               // cond:
+               // result: (First nil yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64FlagLT_ULT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LT (FlagLT_UGT) yes no)
+               // cond:
+               // result: (First nil yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64FlagLT_UGT {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (LT (FlagGT_ULT) yes no)
+               // cond:
+               // result: (First nil no yes)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64FlagGT_ULT {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockAMD64GE
-                       b.SetControl(cmp)
-                       _ = yes
+                       b.Kind = BlockFirst
+                       b.SetControl(nil)
+                       b.swapSuccessors()
                        _ = no
+                       _ = yes
                        return true
                }
-               // match: (LE (FlagEQ) yes no)
+               // match: (LT (FlagGT_UGT) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (First nil no yes)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64FlagEQ {
+                       if v.Op != OpAMD64FlagGT_UGT {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
                        b.Kind = BlockFirst
                        b.SetControl(nil)
-                       _ = yes
+                       b.swapSuccessors()
                        _ = no
+                       _ = yes
                        return true
                }
-               // match: (LE (FlagLT_ULT) yes no)
+       case BlockAMD64NE:
+               // match: (NE (TESTB (SETL cmp) (SETL cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (LT  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64FlagLT_ULT {
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETL {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETL {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = BlockAMD64LT
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LE (FlagLT_UGT) yes no)
+               // match: (NE (TESTB (SETL cmp) (SETL cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (LT  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64FlagLT_UGT {
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETL {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETL {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = BlockAMD64LT
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LE (FlagGT_ULT) yes no)
+               // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (LE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64FlagGT_ULT {
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETLE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETLE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = BlockAMD64LE
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-               // match: (LE (FlagGT_UGT) yes no)
+               // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (LE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64FlagGT_UGT {
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETLE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETLE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = BlockAMD64LE
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-       case BlockAMD64LT:
-               // match: (LT (InvertFlags cmp) yes no)
+               // match: (NE (TESTB (SETG cmp) (SETG cmp)) yes no)
                // cond:
-               // result: (GT cmp yes no)
+               // result: (GT  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64InvertFlags {
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETG {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETG {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
-                       cmp := v.Args[0]
                        yes := b.Succs[0]
                        no := b.Succs[1]
                        b.Kind = BlockAMD64GT
@@ -23375,105 +36331,161 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (LT (FlagEQ) yes no)
+               // match: (NE (TESTB (SETG cmp) (SETG cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (GT  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64FlagEQ {
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETG {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETG {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = BlockAMD64GT
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-               // match: (LT (FlagLT_ULT) yes no)
+               // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (GE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64FlagLT_ULT {
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETGE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETGE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = BlockAMD64GE
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LT (FlagLT_UGT) yes no)
+               // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no)
                // cond:
-               // result: (First nil yes no)
+               // result: (GE  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64FlagLT_UGT {
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETGE {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETGE {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
+                       b.Kind = BlockAMD64GE
+                       b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (LT (FlagGT_ULT) yes no)
+               // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (EQ  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64FlagGT_ULT {
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETEQ {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETEQ {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = BlockAMD64EQ
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-               // match: (LT (FlagGT_UGT) yes no)
+               // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no)
                // cond:
-               // result: (First nil no yes)
+               // result: (EQ  cmp yes no)
                for {
                        v := b.Control
-                       if v.Op != OpAMD64FlagGT_UGT {
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETEQ {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETEQ {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
                                break
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockFirst
-                       b.SetControl(nil)
-                       b.swapSuccessors()
-                       _ = no
+                       b.Kind = BlockAMD64EQ
+                       b.SetControl(cmp)
                        _ = yes
+                       _ = no
                        return true
                }
-       case BlockAMD64NE:
-               // match: (NE (TESTB (SETL  cmp) (SETL  cmp)) yes no)
+               // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no)
                // cond:
-               // result: (LT  cmp yes no)
+               // result: (NE  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != OpAMD64TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != OpAMD64SETL {
+                       if v_0.Op != OpAMD64SETNE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != OpAMD64SETL {
+                       if v_1.Op != OpAMD64SETNE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -23481,27 +36493,27 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockAMD64LT
+                       b.Kind = BlockAMD64NE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no)
+               // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no)
                // cond:
-               // result: (LE  cmp yes no)
+               // result: (NE  cmp yes no)
                for {
                        v := b.Control
                        if v.Op != OpAMD64TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != OpAMD64SETLE {
+                       if v_0.Op != OpAMD64SETNE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != OpAMD64SETLE {
+                       if v_1.Op != OpAMD64SETNE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -23509,27 +36521,27 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockAMD64LE
+                       b.Kind = BlockAMD64NE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETG  cmp) (SETG  cmp)) yes no)
+               // match: (NE (TESTB (SETB cmp) (SETB cmp)) yes no)
                // cond:
-               // result: (GT  cmp yes no)
+               // result: (ULT cmp yes no)
                for {
                        v := b.Control
                        if v.Op != OpAMD64TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != OpAMD64SETG {
+                       if v_0.Op != OpAMD64SETB {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != OpAMD64SETG {
+                       if v_1.Op != OpAMD64SETB {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -23537,27 +36549,27 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockAMD64GT
+                       b.Kind = BlockAMD64ULT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no)
+               // match: (NE (TESTB (SETB cmp) (SETB cmp)) yes no)
                // cond:
-               // result: (GE  cmp yes no)
+               // result: (ULT cmp yes no)
                for {
                        v := b.Control
                        if v.Op != OpAMD64TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != OpAMD64SETGE {
+                       if v_0.Op != OpAMD64SETB {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != OpAMD64SETGE {
+                       if v_1.Op != OpAMD64SETB {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -23565,27 +36577,27 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockAMD64GE
+                       b.Kind = BlockAMD64ULT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no)
+               // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no)
                // cond:
-               // result: (EQ  cmp yes no)
+               // result: (ULE cmp yes no)
                for {
                        v := b.Control
                        if v.Op != OpAMD64TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != OpAMD64SETEQ {
+                       if v_0.Op != OpAMD64SETBE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != OpAMD64SETEQ {
+                       if v_1.Op != OpAMD64SETBE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -23593,27 +36605,27 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockAMD64EQ
+                       b.Kind = BlockAMD64ULE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no)
+               // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no)
                // cond:
-               // result: (NE  cmp yes no)
+               // result: (ULE cmp yes no)
                for {
                        v := b.Control
                        if v.Op != OpAMD64TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != OpAMD64SETNE {
+                       if v_0.Op != OpAMD64SETBE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != OpAMD64SETNE {
+                       if v_1.Op != OpAMD64SETBE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -23621,27 +36633,27 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockAMD64NE
+                       b.Kind = BlockAMD64ULE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETB  cmp) (SETB  cmp)) yes no)
+               // match: (NE (TESTB (SETA cmp) (SETA cmp)) yes no)
                // cond:
-               // result: (ULT cmp yes no)
+               // result: (UGT cmp yes no)
                for {
                        v := b.Control
                        if v.Op != OpAMD64TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != OpAMD64SETB {
+                       if v_0.Op != OpAMD64SETA {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != OpAMD64SETB {
+                       if v_1.Op != OpAMD64SETA {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -23649,27 +36661,27 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockAMD64ULT
+                       b.Kind = BlockAMD64UGT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no)
+               // match: (NE (TESTB (SETA cmp) (SETA cmp)) yes no)
                // cond:
-               // result: (ULE cmp yes no)
+               // result: (UGT cmp yes no)
                for {
                        v := b.Control
                        if v.Op != OpAMD64TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != OpAMD64SETBE {
+                       if v_0.Op != OpAMD64SETA {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != OpAMD64SETBE {
+                       if v_1.Op != OpAMD64SETA {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -23677,27 +36689,27 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockAMD64ULE
+                       b.Kind = BlockAMD64UGT
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
                        return true
                }
-               // match: (NE (TESTB (SETA  cmp) (SETA  cmp)) yes no)
+               // match: (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no)
                // cond:
-               // result: (UGT cmp yes no)
+               // result: (UGE cmp yes no)
                for {
                        v := b.Control
                        if v.Op != OpAMD64TESTB {
                                break
                        }
                        v_0 := v.Args[0]
-                       if v_0.Op != OpAMD64SETA {
+                       if v_0.Op != OpAMD64SETAE {
                                break
                        }
                        cmp := v_0.Args[0]
                        v_1 := v.Args[1]
-                       if v_1.Op != OpAMD64SETA {
+                       if v_1.Op != OpAMD64SETAE {
                                break
                        }
                        if cmp != v_1.Args[0] {
@@ -23705,7 +36717,7 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        yes := b.Succs[0]
                        no := b.Succs[1]
-                       b.Kind = BlockAMD64UGT
+                       b.Kind = BlockAMD64UGE
                        b.SetControl(cmp)
                        _ = yes
                        _ = no
@@ -23770,6 +36782,37 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
+               // match: (NE (TESTL y (SHLL (MOVLconst [1]) x)))
+               // cond: !config.nacl
+               // result: (ULT (BTL x y))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTL {
+                               break
+                       }
+                       y := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SHLL {
+                               break
+                       }
+                       v_1_0 := v_1.Args[0]
+                       if v_1_0.Op != OpAMD64MOVLconst {
+                               break
+                       }
+                       if v_1_0.AuxInt != 1 {
+                               break
+                       }
+                       x := v_1.Args[1]
+                       if !(!config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64ULT
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTL, TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       return true
+               }
                // match: (NE (TESTQ (SHLQ (MOVQconst [1]) x) y))
                // cond: !config.nacl
                // result: (ULT (BTQ x y))
@@ -23801,6 +36844,37 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
+               // match: (NE (TESTQ y (SHLQ (MOVQconst [1]) x)))
+               // cond: !config.nacl
+               // result: (ULT (BTQ x y))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTQ {
+                               break
+                       }
+                       y := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SHLQ {
+                               break
+                       }
+                       v_1_0 := v_1.Args[0]
+                       if v_1_0.Op != OpAMD64MOVQconst {
+                               break
+                       }
+                       if v_1_0.AuxInt != 1 {
+                               break
+                       }
+                       x := v_1.Args[1]
+                       if !(!config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64ULT
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQ, TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       return true
+               }
                // match: (NE (TESTLconst [c] x))
                // cond: isPowerOfTwo(c) && log2(c) < 32 && !config.nacl
                // result: (ULT (BTLconst [log2(c)] x))
@@ -23865,7 +36939,59 @@ func rewriteBlockAMD64(b *Block) bool {
                        b.SetControl(v0)
                        return true
                }
-               // match: (NE (TESTB (SETGF  cmp) (SETGF  cmp)) yes no)
+               // match: (NE (TESTQ x (MOVQconst [c])))
+               // cond: isPowerOfTwo(c) && log2(c) < 64 && !config.nacl
+               // result: (ULT (BTQconst [log2(c)] x))
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTQ {
+                               break
+                       }
+                       x := v.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64MOVQconst {
+                               break
+                       }
+                       c := v_1.AuxInt
+                       if !(isPowerOfTwo(c) && log2(c) < 64 && !config.nacl) {
+                               break
+                       }
+                       b.Kind = BlockAMD64ULT
+                       v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, TypeFlags)
+                       v0.AuxInt = log2(c)
+                       v0.AddArg(x)
+                       b.SetControl(v0)
+                       return true
+               }
+               // match: (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no)
+               // cond:
+               // result: (UGT  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETGF {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETGF {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64UGT
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no)
                // cond:
                // result: (UGT  cmp yes no)
                for {
@@ -23921,6 +37047,62 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
+               // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no)
+               // cond:
+               // result: (UGE  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETGEF {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETGEF {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64UGE
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
+               // match: (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no)
+               // cond:
+               // result: (EQF  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETEQF {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETEQF {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64EQF
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
                // match: (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no)
                // cond:
                // result: (EQF  cmp yes no)
@@ -23977,6 +37159,34 @@ func rewriteBlockAMD64(b *Block) bool {
                        _ = no
                        return true
                }
+               // match: (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no)
+               // cond:
+               // result: (NEF  cmp yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               break
+                       }
+                       v_0 := v.Args[0]
+                       if v_0.Op != OpAMD64SETNEF {
+                               break
+                       }
+                       cmp := v_0.Args[0]
+                       v_1 := v.Args[1]
+                       if v_1.Op != OpAMD64SETNEF {
+                               break
+                       }
+                       if cmp != v_1.Args[0] {
+                               break
+                       }
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64NEF
+                       b.SetControl(cmp)
+                       _ = yes
+                       _ = no
+                       return true
+               }
                // match: (NE (InvertFlags cmp) yes no)
                // cond:
                // result: (NE cmp yes no)
index 7cb6f19897d8f1b1939364ae671577e3cc59ff2c..623f3475fd6a824562c59e8cabcf0011c249ed4d 100644 (file)
@@ -760,6 +760,40 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC x (MOVWconst [c]) flags)
+       // cond:
+       // result: (ADCconst [c] x flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               flags := v.Args[2]
+               v.reset(OpARMADCconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC (MOVWconst [c]) x flags)
+       // cond:
+       // result: (ADCconst [c] x flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SLLconst [c] y) flags)
        // cond:
        // result: (ADCshiftLL x y [c] flags)
@@ -798,6 +832,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SLLconst [c] y) x flags)
+       // cond:
+       // result: (ADCshiftLL x y [c] flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SLLconst [c] y) flags)
+       // cond:
+       // result: (ADCshiftLL x y [c] flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SRLconst [c] y) flags)
        // cond:
        // result: (ADCshiftRL x y [c] flags)
@@ -836,6 +908,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SRLconst [c] y) x flags)
+       // cond:
+       // result: (ADCshiftRL x y [c] flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSRLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SRLconst [c] y) flags)
+       // cond:
+       // result: (ADCshiftRL x y [c] flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSRLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SRAconst [c] y) flags)
        // cond:
        // result: (ADCshiftRA x y [c] flags)
@@ -874,6 +984,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SRAconst [c] y) x flags)
+       // cond:
+       // result: (ADCshiftRA x y [c] flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSRAconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SRAconst [c] y) flags)
+       // cond:
+       // result: (ADCshiftRA x y [c] flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSRAconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SLL y z) flags)
        // cond:
        // result: (ADCshiftLLreg x y z flags)
@@ -912,6 +1060,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SLL y z) x flags)
+       // cond:
+       // result: (ADCshiftLLreg x y z flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSLL {
+                       break
+               }
+               y := v_0.Args[0]
+               z := v_0.Args[1]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftLLreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SLL y z) flags)
+       // cond:
+       // result: (ADCshiftLLreg x y z flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSLL {
+                       break
+               }
+               y := v_1.Args[0]
+               z := v_1.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftLLreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SRL y z) flags)
        // cond:
        // result: (ADCshiftRLreg x y z flags)
@@ -950,6 +1136,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SRL y z) x flags)
+       // cond:
+       // result: (ADCshiftRLreg x y z flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSRL {
+                       break
+               }
+               y := v_0.Args[0]
+               z := v_0.Args[1]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRLreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SRL y z) flags)
+       // cond:
+       // result: (ADCshiftRLreg x y z flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSRL {
+                       break
+               }
+               y := v_1.Args[0]
+               z := v_1.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRLreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
        // match: (ADC x (SRA y z) flags)
        // cond:
        // result: (ADCshiftRAreg x y z flags)
@@ -988,6 +1212,44 @@ func rewriteValueARM_OpARMADC(v *Value) bool {
                v.AddArg(flags)
                return true
        }
+       // match: (ADC (SRA y z) x flags)
+       // cond:
+       // result: (ADCshiftRAreg x y z flags)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMSRA {
+                       break
+               }
+               y := v_0.Args[0]
+               z := v_0.Args[1]
+               x := v.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRAreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
+       // match: (ADC x (SRA y z) flags)
+       // cond:
+       // result: (ADCshiftRAreg x y z flags)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMSRA {
+                       break
+               }
+               y := v_1.Args[0]
+               z := v_1.Args[1]
+               flags := v.Args[2]
+               v.reset(OpARMADCshiftRAreg)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               v.AddArg(flags)
+               return true
+       }
        return false
 }
 func rewriteValueARM_OpARMADCconst(v *Value) bool {
@@ -1299,31 +1561,31 @@ func rewriteValueARM_OpARMADCshiftRLreg(v *Value) bool {
 func rewriteValueARM_OpARMADD(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (ADD (MOVWconst [c]) x)
+       // match: (ADD x (MOVWconst [c]))
        // cond:
        // result: (ADDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARMADDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADD x (MOVWconst [c]))
+       // match: (ADD (MOVWconst [c]) x)
        // cond:
        // result: (ADDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARMADDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -1594,6 +1856,31 @@ func rewriteValueARM_OpARMADD(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (ADD <t> (RSBconst [d] y) (RSBconst [c] x))
+       // cond:
+       // result: (RSBconst [c+d] (ADD <t> x y))
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMRSBconst {
+                       break
+               }
+               d := v_0.AuxInt
+               y := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMRSBconst {
+                       break
+               }
+               c := v_1.AuxInt
+               x := v_1.Args[0]
+               v.reset(OpARMRSBconst)
+               v.AuxInt = c + d
+               v0 := b.NewValue0(v.Pos, OpARMADD, t)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        // match: (ADD (MUL x y) a)
        // cond:
        // result: (MULA x y a)
@@ -1631,31 +1918,31 @@ func rewriteValueARM_OpARMADD(v *Value) bool {
        return false
 }
 func rewriteValueARM_OpARMADDS(v *Value) bool {
-       // match: (ADDS (MOVWconst [c]) x)
+       // match: (ADDS x (MOVWconst [c]))
        // cond:
        // result: (ADDSconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARMADDSconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADDS x (MOVWconst [c]))
+       // match: (ADDS (MOVWconst [c]) x)
        // cond:
        // result: (ADDSconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARMADDSconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -2492,31 +2779,31 @@ func rewriteValueARM_OpARMADDshiftRLreg(v *Value) bool {
        return false
 }
 func rewriteValueARM_OpARMAND(v *Value) bool {
-       // match: (AND (MOVWconst [c]) x)
+       // match: (AND x (MOVWconst [c]))
        // cond:
        // result: (ANDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARMANDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (AND x (MOVWconst [c]))
+       // match: (AND (MOVWconst [c]) x)
        // cond:
        // result: (ANDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARMANDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -7124,185 +7411,9 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                if !(int32(c) == -1) {
                        break
                }
-               v.reset(OpARMRSBconst)
-               v.AuxInt = 0
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL _ (MOVWconst [0]))
-       // cond:
-       // result: (MOVWconst [0])
-       for {
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               if v_1.AuxInt != 0 {
-                       break
-               }
-               v.reset(OpARMMOVWconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MUL x (MOVWconst [1]))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               if v_1.AuxInt != 1 {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
-                       break
-               }
-               v.reset(OpARMSLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
-                       break
-               }
-               v.reset(OpARMADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (RSBshiftLL x x [log2(c+1)])
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
-                       break
-               }
-               v.reset(OpARMRSBshiftLL)
-               v.AuxInt = log2(c + 1)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARMSLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARMSLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (RSBshiftLL <x.Type> x x [3]))
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARMSLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARMRSBshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL x (MOVWconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARMSLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARMRSBconst)
+               v.AuxInt = 0
+               v.AddArg(x)
                return true
        }
        // match: (MUL (MOVWconst [c]) x)
@@ -7323,6 +7434,21 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL _ (MOVWconst [0]))
+       // cond:
+       // result: (MOVWconst [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARMMOVWconst)
+               v.AuxInt = 0
+               return true
+       }
        // match: (MUL (MOVWconst [0]) _)
        // cond:
        // result: (MOVWconst [0])
@@ -7338,6 +7464,23 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (MUL x (MOVWconst [1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL (MOVWconst [1]) x)
        // cond:
        // result: x
@@ -7355,6 +7498,24 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARMSLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: isPowerOfTwo(c)
        // result: (SLLconst [log2(c)] x)
@@ -7373,6 +7534,25 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARMADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: isPowerOfTwo(c-1) && int32(c) >= 3
        // result: (ADDshiftLL x x [log2(c-1)])
@@ -7392,6 +7572,25 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (RSBshiftLL x x [log2(c+1)])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+                       break
+               }
+               v.reset(OpARMRSBshiftLL)
+               v.AuxInt = log2(c + 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: isPowerOfTwo(c+1) && int32(c) >= 7
        // result: (RSBshiftLL x x [log2(c+1)])
@@ -7411,6 +7610,28 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARMSLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
        // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
@@ -7433,6 +7654,28 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARMSLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
        // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
@@ -7455,6 +7698,28 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (RSBshiftLL <x.Type> x x [3]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARMSLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARMRSBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
        // result: (SLLconst [log2(c/7)] (RSBshiftLL <x.Type> x x [3]))
@@ -7477,6 +7742,28 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARMSLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARMADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) x)
        // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
        // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
@@ -7517,6 +7804,24 @@ func rewriteValueARM_OpARMMUL(v *Value) bool {
                v.AuxInt = int64(int32(c * d))
                return true
        }
+       // match: (MUL (MOVWconst [d]) (MOVWconst [c]))
+       // cond:
+       // result: (MOVWconst [int64(int32(c*d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARMMOVWconst)
+               v.AuxInt = int64(int32(c * d))
+               return true
+       }
        return false
 }
 func rewriteValueARM_OpARMMULA(v *Value) bool {
@@ -8284,31 +8589,31 @@ func rewriteValueARM_OpARMNotEqual(v *Value) bool {
        return false
 }
 func rewriteValueARM_OpARMOR(v *Value) bool {
-       // match: (OR (MOVWconst [c]) x)
+       // match: (OR x (MOVWconst [c]))
        // cond:
        // result: (ORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARMORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (OR x (MOVWconst [c]))
+       // match: (OR (MOVWconst [c]) x)
        // cond:
        // result: (ORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARMORconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -8629,7 +8934,7 @@ func rewriteValueARM_OpARMORshiftLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORshiftLL [c] (SRLconst x [32-c]) x)
+       // match: (ORshiftLL [c] (SRLconst x [32-c]) x)
        // cond:
        // result: (SRRconst [32-c] x)
        for {
@@ -8856,7 +9161,7 @@ func rewriteValueARM_OpARMORshiftRL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORshiftRL [c] (SLLconst x [32-c]) x)
+       // match: (ORshiftRL [c] (SLLconst x [32-c]) x)
        // cond:
        // result: (SRRconst [   c] x)
        for {
@@ -11112,21 +11417,6 @@ func rewriteValueARM_OpARMSUB(v *Value) bool {
        return false
 }
 func rewriteValueARM_OpARMSUBS(v *Value) bool {
-       // match: (SUBS (MOVWconst [c]) x)
-       // cond:
-       // result: (RSBSconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARMRSBSconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
        // match: (SUBS x (MOVWconst [c]))
        // cond:
        // result: (SUBSconst [c] x)
@@ -11976,31 +12266,31 @@ func rewriteValueARM_OpARMSUBshiftRLreg(v *Value) bool {
        return false
 }
 func rewriteValueARM_OpARMXOR(v *Value) bool {
-       // match: (XOR (MOVWconst [c]) x)
+       // match: (XOR x (MOVWconst [c]))
        // cond:
        // result: (XORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARMMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARMXORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XOR x (MOVWconst [c]))
+       // match: (XOR (MOVWconst [c]) x)
        // cond:
        // result: (XORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARMMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARMXORconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -14524,7 +14814,7 @@ func rewriteValueARM_OpLsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x8  x y)
+       // match: (Lsh16x8 x y)
        // cond:
        // result: (SLL x (ZeroExt8to32 y))
        for {
@@ -14630,7 +14920,7 @@ func rewriteValueARM_OpLsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x8  x y)
+       // match: (Lsh32x8 x y)
        // cond:
        // result: (SLL x (ZeroExt8to32 y))
        for {
@@ -14736,7 +15026,7 @@ func rewriteValueARM_OpLsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x8  x y)
+       // match: (Lsh8x8 x y)
        // cond:
        // result: (SLL x (ZeroExt8to32 y))
        for {
@@ -15677,7 +15967,7 @@ func rewriteValueARM_OpRsh16Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16Ux8  x y)
+       // match: (Rsh16Ux8 x y)
        // cond:
        // result: (SRL (ZeroExt16to32 x) (ZeroExt8to32 y))
        for {
@@ -15797,7 +16087,7 @@ func rewriteValueARM_OpRsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x8  x y)
+       // match: (Rsh16x8 x y)
        // cond:
        // result: (SRA (SignExt16to32 x) (ZeroExt8to32 y))
        for {
@@ -15905,7 +16195,7 @@ func rewriteValueARM_OpRsh32Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32Ux8  x y)
+       // match: (Rsh32Ux8 x y)
        // cond:
        // result: (SRL x (ZeroExt8to32 y))
        for {
@@ -16007,7 +16297,7 @@ func rewriteValueARM_OpRsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x8  x y)
+       // match: (Rsh32x8 x y)
        // cond:
        // result: (SRA x (ZeroExt8to32 y))
        for {
@@ -16126,7 +16416,7 @@ func rewriteValueARM_OpRsh8Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux8  x y)
+       // match: (Rsh8Ux8 x y)
        // cond:
        // result: (SRL (ZeroExt8to32 x) (ZeroExt8to32 y))
        for {
@@ -16246,7 +16536,7 @@ func rewriteValueARM_OpRsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x8  x y)
+       // match: (Rsh8x8 x y)
        // cond:
        // result: (SRA (SignExt8to32 x) (ZeroExt8to32 y))
        for {
index 10f2f882c167fec1a1ea14ff431af55a3aa08845..c32bdf7a9ebf962e2bd07264d82d0bbbb4393908 100644 (file)
@@ -716,31 +716,31 @@ func rewriteValueARM64(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64ADD(v *Value) bool {
-       // match: (ADD (MOVDconst [c]) x)
+       // match: (ADD x (MOVDconst [c]))
        // cond:
        // result: (ADDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARM64ADDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADD x (MOVDconst [c]))
+       // match: (ADD (MOVDconst [c]) x)
        // cond:
        // result: (ADDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARM64ADDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -899,7 +899,7 @@ func rewriteValueARM64_OpARM64ADDconst(v *Value) bool {
                v.AddArg(ptr)
                return true
        }
-       // match: (ADDconst [0]  x)
+       // match: (ADDconst [0] x)
        // cond:
        // result: x
        for {
@@ -1181,31 +1181,31 @@ func rewriteValueARM64_OpARM64ADDshiftRL(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64AND(v *Value) bool {
-       // match: (AND (MOVDconst [c]) x)
+       // match: (AND x (MOVDconst [c]))
        // cond:
        // result: (ANDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARM64ANDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (AND x (MOVDconst [c]))
+       // match: (AND (MOVDconst [c]) x)
        // cond:
        // result: (ANDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARM64ANDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -1239,6 +1239,21 @@ func rewriteValueARM64_OpARM64AND(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (AND (MVN y) x)
+       // cond:
+       // result: (BIC x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MVN {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64BIC)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (AND x (SLLconst [c] y))
        // cond:
        // result: (ANDshiftLL x y [c])
@@ -1344,7 +1359,7 @@ func rewriteValueARM64_OpARM64AND(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64ANDconst(v *Value) bool {
-       // match: (ANDconst [0]  _)
+       // match: (ANDconst [0] _)
        // cond:
        // result: (MOVDconst [0])
        for {
@@ -1668,7 +1683,7 @@ func rewriteValueARM64_OpARM64BIC(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64BICconst(v *Value) bool {
-       // match: (BICconst [0]  x)
+       // match: (BICconst [0] x)
        // cond:
        // result: x
        for {
@@ -2124,7 +2139,7 @@ func rewriteValueARM64_OpARM64CMPWconst(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64CMPconst(v *Value) bool {
-       // match: (CMPconst  (MOVDconst [x]) [y])
+       // match: (CMPconst (MOVDconst [x]) [y])
        // cond: x==y
        // result: (FlagEQ)
        for {
@@ -2140,7 +2155,7 @@ func rewriteValueARM64_OpARM64CMPconst(v *Value) bool {
                v.reset(OpARM64FlagEQ)
                return true
        }
-       // match: (CMPconst  (MOVDconst [x]) [y])
+       // match: (CMPconst (MOVDconst [x]) [y])
        // cond: int64(x)<int64(y) && uint64(x)<uint64(y)
        // result: (FlagLT_ULT)
        for {
@@ -2156,7 +2171,7 @@ func rewriteValueARM64_OpARM64CMPconst(v *Value) bool {
                v.reset(OpARM64FlagLT_ULT)
                return true
        }
-       // match: (CMPconst  (MOVDconst [x]) [y])
+       // match: (CMPconst (MOVDconst [x]) [y])
        // cond: int64(x)<int64(y) && uint64(x)>uint64(y)
        // result: (FlagLT_UGT)
        for {
@@ -2172,7 +2187,7 @@ func rewriteValueARM64_OpARM64CMPconst(v *Value) bool {
                v.reset(OpARM64FlagLT_UGT)
                return true
        }
-       // match: (CMPconst  (MOVDconst [x]) [y])
+       // match: (CMPconst (MOVDconst [x]) [y])
        // cond: int64(x)>int64(y) && uint64(x)<uint64(y)
        // result: (FlagGT_ULT)
        for {
@@ -2188,7 +2203,7 @@ func rewriteValueARM64_OpARM64CMPconst(v *Value) bool {
                v.reset(OpARM64FlagGT_ULT)
                return true
        }
-       // match: (CMPconst  (MOVDconst [x]) [y])
+       // match: (CMPconst (MOVDconst [x]) [y])
        // cond: int64(x)>int64(y) && uint64(x)>uint64(y)
        // result: (FlagGT_UGT)
        for {
@@ -2568,7 +2583,7 @@ func rewriteValueARM64_OpARM64CSELULT0(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64DIV(v *Value) bool {
-       // match: (DIV   (MOVDconst [c]) (MOVDconst [d]))
+       // match: (DIV (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(c)/int64(d)])
        for {
@@ -2589,7 +2604,7 @@ func rewriteValueARM64_OpARM64DIV(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64DIVW(v *Value) bool {
-       // match: (DIVW  (MOVDconst [c]) (MOVDconst [d]))
+       // match: (DIVW (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(int32(c)/int32(d))])
        for {
@@ -3502,7 +3517,7 @@ func rewriteValueARM64_OpARM64LessThanU(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64MOD(v *Value) bool {
-       // match: (MOD   (MOVDconst [c]) (MOVDconst [d]))
+       // match: (MOD (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(c)%int64(d)])
        for {
@@ -3523,7 +3538,7 @@ func rewriteValueARM64_OpARM64MOD(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64MODW(v *Value) bool {
-       // match: (MODW  (MOVDconst [c]) (MOVDconst [d]))
+       // match: (MODW (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(int32(c)%int32(d))])
        for {
@@ -3750,7 +3765,7 @@ func rewriteValueARM64_OpARM64MOVBreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBreg  (MOVDconst [c]))
+       // match: (MOVBreg (MOVDconst [c]))
        // cond:
        // result: (MOVDconst [int64(int8(c))])
        for {
@@ -4101,7 +4116,7 @@ func rewriteValueARM64_OpARM64MOVDreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVDreg  (MOVDconst [c]))
+       // match: (MOVDreg (MOVDconst [c]))
        // cond:
        // result: (MOVDconst [c])
        for {
@@ -4521,7 +4536,7 @@ func rewriteValueARM64_OpARM64MOVHreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg  (MOVDconst [c]))
+       // match: (MOVHreg (MOVDconst [c]))
        // cond:
        // result: (MOVDconst [int64(int16(c))])
        for {
@@ -5097,7 +5112,7 @@ func rewriteValueARM64_OpARM64MOVWreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg  (MOVDconst [c]))
+       // match: (MOVWreg (MOVDconst [c]))
        // cond:
        // result: (MOVDconst [int64(int32(c))])
        for {
@@ -5299,6 +5314,22 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL (MOVDconst [-1]) x)
+       // cond:
+       // result: (NEG x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL _ (MOVDconst [0]))
        // cond:
        // result: (MOVDconst [0])
@@ -5314,6 +5345,21 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (MUL (MOVDconst [0]) _)
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
        // match: (MUL x (MOVDconst [1]))
        // cond:
        // result: x
@@ -5331,6 +5377,23 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL (MOVDconst [1]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: isPowerOfTwo(c)
        // result: (SLLconst [log2(c)] x)
@@ -5349,6 +5412,24 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: isPowerOfTwo(c-1) && c >= 3
        // result: (ADDshiftLL x x [log2(c-1)])
@@ -5368,6 +5449,25 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: isPowerOfTwo(c+1) && c >= 7
        // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
@@ -5389,6 +5489,27 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: c%3 == 0 && isPowerOfTwo(c/3)
        // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
@@ -5411,6 +5532,28 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: c%5 == 0 && isPowerOfTwo(c/5)
        // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
@@ -5433,6 +5576,28 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: c%7 == 0 && isPowerOfTwo(c/7)
        // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
@@ -5457,6 +5622,30 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MUL x (MOVDconst [c]))
        // cond: c%9 == 0 && isPowerOfTwo(c/9)
        // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
@@ -5479,57 +5668,156 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [-1]) x)
-       // cond:
-       // result: (NEG x)
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != -1 {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [0]) _)
+       // match: (MUL (MOVDconst [c]) (MOVDconst [d]))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (MOVDconst [c*d])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 0 {
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
+               d := v_1.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.AuxInt = c * d
                return true
        }
-       // match: (MUL (MOVDconst [1]) x)
+       // match: (MUL (MOVDconst [d]) (MOVDconst [c]))
        // cond:
-       // result: x
+       // result: (MOVDconst [c*d])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 1 {
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c * d
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MULW(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==-1
+       // result: (NEG x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: int32(c)==-1
+       // result: (NEG x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
+               c := v_0.AuxInt
                x := v.Args[1]
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW _ (MOVDconst [c]))
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) _)
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==1
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == 1) {
+                       break
+               }
                v.reset(OpCopy)
                v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: int32(c)==1
+       // result: x
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -5537,6 +5825,24 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
+               if !(int32(c) == 1) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
                if !(isPowerOfTwo(c)) {
                        break
                }
@@ -5545,7 +5851,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
+       // match: (MULW (MOVDconst [c]) x)
        // cond: isPowerOfTwo(c)
        // result: (SLLconst [log2(c)] x)
        for {
@@ -5563,8 +5869,27 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && c >= 3
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
        // result: (ADDshiftLL x x [log2(c-1)])
        for {
                v_0 := v.Args[0]
@@ -5573,7 +5898,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && c >= 3) {
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
                        break
                }
                v.reset(OpARM64ADDshiftLL)
@@ -5582,8 +5907,29 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && c >= 7
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
        // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
        for {
                v_0 := v.Args[0]
@@ -5592,7 +5938,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
                v.reset(OpARM64ADDshiftLL)
@@ -5603,8 +5949,30 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
        // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
        for {
                v_0 := v.Args[0]
@@ -5613,7 +5981,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
                v.reset(OpARM64SLLconst)
@@ -5625,8 +5993,30 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
        // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
        for {
                v_0 := v.Args[0]
@@ -5635,7 +6025,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
                v.reset(OpARM64SLLconst)
@@ -5647,17 +6037,17 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
        // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
                v.reset(OpARM64SLLconst)
@@ -5671,9 +6061,9 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -5681,21 +6071,67 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
                v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
+               v.AuxInt = log2(c / 7)
                v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
                v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MUL   (MOVDconst [c]) (MOVDconst [d]))
+       // match: (MULW (MOVDconst [c]) (MOVDconst [d]))
        // cond:
-       // result: (MOVDconst [c*d])
+       // result: (MOVDconst [int64(int32(c)*int32(d))])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -5708,105 +6144,143 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
                }
                d := v_1.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = c * d
+               v.AuxInt = int64(int32(c) * int32(d))
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==-1
-       // result: (NEG x)
+       // match: (MULW (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c)*int32(d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(int32(c) == -1) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c) * int32(d))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MVN(v *Value) bool {
+       // match: (MVN (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [^c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = ^c
                return true
        }
-       // match: (MULW _ (MOVDconst [c]))
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       return false
+}
+func rewriteValueARM64_OpARM64NEG(v *Value) bool {
+       // match: (NEG (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [-c])
        for {
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 0) {
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -c
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NotEqual(v *Value) bool {
+       // match: (NotEqual (FlagEQ))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
                v.reset(OpARM64MOVDconst)
                v.AuxInt = 0
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==1
-       // result: x
+       // match: (NotEqual (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(int32(c) == 1) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (NotEqual (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (FlagGT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
+       // match: (NotEqual (FlagGT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (InvertFlags x))
+       // cond:
+       // result: (NotEqual x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
+               x := v_0.Args[0]
+               v.reset(OpARM64NotEqual)
                v.AddArg(x)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       return false
+}
+func rewriteValueARM64_OpARM64OR(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (OR x (MOVDconst [c]))
+       // cond:
+       // result: (ORconst  [c] x)
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
@@ -5814,628 +6288,900 @@ func rewriteValueARM64_OpARM64MULW(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (MOVDconst [c]) x)
+       // cond:
+       // result: (ORconst  [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       // match: (OR x x)
+       // cond:
+       // result: x
        for {
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               if x != v.Args[1] {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       // match: (OR x (SLLconst [c] y))
+       // cond:
+       // result: (ORshiftLL  x y [c])
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               y := v_1.Args[0]
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       // match: (OR (SLLconst [c] y) x)
+       // cond:
+       // result: (ORshiftLL  x y [c])
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       // match: (OR x (SRLconst [c] y))
+       // cond:
+       // result: (ORshiftRL  x y [c])
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               y := v_1.Args[0]
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==-1
-       // result: (NEG x)
+       // match: (OR (SRLconst [c] y) x)
+       // cond:
+       // result: (ORshiftRL  x y [c])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
                c := v_0.AuxInt
+               y := v_0.Args[0]
                x := v.Args[1]
-               if !(int32(c) == -1) {
-                       break
-               }
-               v.reset(OpARM64NEG)
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW (MOVDconst [c]) _)
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       // match: (OR x (SRAconst [c] y))
+       // cond:
+       // result: (ORshiftRA  x y [c])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               if !(int32(c) == 0) {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==1
-       // result: x
+       // match: (OR (SRAconst [c] y) x)
+       // cond:
+       // result: (ORshiftRA  x y [c])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SRAconst {
                        break
                }
                c := v_0.AuxInt
+               y := v_0.Args[0]
                x := v.Args[1]
-               if !(int32(c) == 1) {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               t := v.Type
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               if o0.AuxInt != 8 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               if o1.AuxInt != 16 {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+               if s0.AuxInt != 24 {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               i3 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if mem != x1.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW  (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c)*int32(d))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               i1 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c) * int32(d))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MVN(v *Value) bool {
-       // match: (MVN (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [^c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if p != x2.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = ^c
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64NEG(v *Value) bool {
-       // match: (NEG (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [-c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if mem != x2.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -c
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64NotEqual(v *Value) bool {
-       // match: (NotEqual (FlagEQ))
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (NotEqual (FlagLT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (FlagLT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               i0 := x3.AuxInt
+               if x3.Aux != s {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (FlagGT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               if p != x3.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (FlagGT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               if mem != x3.Args[1] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (InvertFlags x))
-       // cond:
-       // result: (NotEqual x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64NotEqual)
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64OR(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (OR  (MOVDconst [c]) x)
-       // cond:
-       // result: (ORconst  [c] x)
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               t := v.Type
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR  x (MOVDconst [c]))
-       // cond:
-       // result: (ORconst  [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR  x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               i0 := x3.AuxInt
+               s := x3.Aux
+               p := x3.Args[0]
+               mem := x3.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR  x s:(SLLconst [c] y))
-       // cond: s.Uses == 1 && clobber(s)
-       // result: (ORshiftLL  x y [c])
-       for {
-               x := v.Args[0]
-               s := v.Args[1]
-               if s.Op != OpARM64SLLconst {
+               if o0.AuxInt != 8 {
                        break
                }
-               c := s.AuxInt
-               y := s.Args[0]
-               if !(s.Uses == 1 && clobber(s)) {
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  s:(SLLconst [c] y) x)
-       // cond: s.Uses == 1 && clobber(s)
-       // result: (ORshiftLL  x y [c])
-       for {
-               s := v.Args[0]
-               if s.Op != OpARM64SLLconst {
+               if o1.AuxInt != 16 {
                        break
                }
-               c := s.AuxInt
-               y := s.Args[0]
-               x := v.Args[1]
-               if !(s.Uses == 1 && clobber(s)) {
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  x (SLLconst [c] y))
-       // cond:
-       // result: (ORshiftLL  x y [c])
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               if s0.AuxInt != 24 {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  (SLLconst [c] y) x)
-       // cond:
-       // result: (ORshiftLL  x y [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               c := v_0.AuxInt
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  x (SRLconst [c] y))
-       // cond:
-       // result: (ORshiftRL  x y [c])
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               v.reset(OpARM64ORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  (SRLconst [c] y) x)
-       // cond:
-       // result: (ORshiftRL  x y [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               i3 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpARM64ORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR  x (SRAconst [c] y))
-       // cond:
-       // result: (ORshiftRA  x y [c])
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && i4 == i0+4   && i5 == i0+5   && i6 == i0+6   && i7 == i0+7   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRAconst {
+               t := v.Type
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i5 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i4 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x5.AuxInt
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x6.AuxInt
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if mem != x6.Args[1] {
+                       break
+               }
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x7.AuxInt
+               if x7.Aux != s {
+                       break
+               }
+               if p != x7.Args[0] {
+                       break
+               }
+               if mem != x7.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && i4 == i0+4   && i5 == i0+5   && i6 == i0+6   && i7 == i0+7   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       for {
+               t := v.Type
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x7.AuxInt
+               s := x7.Aux
+               p := x7.Args[0]
+               mem := x7.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               v.reset(OpARM64ORshiftRA)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i5 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i4 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x5.AuxInt
+               if x5.Aux != s {
+                       break
+               }
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x6.AuxInt
+               if x6.Aux != s {
+                       break
+               }
+               if p != x6.Args[0] {
+                       break
+               }
+               if mem != x6.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR  (SRAconst [c] y) x)
-       // cond:
-       // result: (ORshiftRA  x y [c])
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRAconst {
+               t := v.Type
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               c := v_0.AuxInt
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpARM64ORshiftRA)
-               v.AuxInt = c
-               v.AddArg(x)
-               v.AddArg(y)
+               if o0.AuxInt != 8 {
+                       break
+               }
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]        y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))    y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1   && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i-3] p) mem)
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
-               o0 := v.Args[0]
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x3.AuxInt
+               s := x3.Aux
+               p := x3.Args[0]
+               mem := x3.Args[1]
+               o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -6464,10 +7210,16 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
                y1 := o1.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -6476,9 +7228,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -6496,9 +7246,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i-2 {
-                       break
-               }
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -6508,44 +7256,26 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               y3 := v.Args[1]
-               if y3.Op != OpARM64MOVDnop {
-                       break
-               }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x3.AuxInt != i-3 {
-                       break
-               }
-               if x3.Aux != s {
-                       break
-               }
-               if p != x3.Args[0] {
-                       break
-               }
-               if mem != x3.Args[1] {
-                       break
-               }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i - 3
-               v1.AddArg(p)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]    y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))    y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))    y4:(MOVDnop x4:(MOVBUload [i-4] {s} p mem)))    y5:(MOVDnop x5:(MOVBUload [i-5] {s} p mem)))    y6:(MOVDnop x6:(MOVBUload [i-6] {s} p mem)))    y7:(MOVDnop x7:(MOVBUload [i-7] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1   && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i-7] p) mem))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && i4 == i0+4   && i5 == i0+5   && i6 == i0+6   && i7 == i0+7   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                o0 := v.Args[0]
@@ -6605,7 +7335,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -6617,9 +7347,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -6637,9 +7365,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i-2 {
-                       break
-               }
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -6657,9 +7383,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != i-3 {
-                       break
-               }
+               i3 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -6677,9 +7401,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if x4.AuxInt != i-4 {
-                       break
-               }
+               i4 := x4.AuxInt
                if x4.Aux != s {
                        break
                }
@@ -6697,9 +7419,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x5.Op != OpARM64MOVBUload {
                        break
                }
-               if x5.AuxInt != i-5 {
-                       break
-               }
+               i5 := x5.AuxInt
                if x5.Aux != s {
                        break
                }
@@ -6717,9 +7437,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x6.Op != OpARM64MOVBUload {
                        break
                }
-               if x6.AuxInt != i-6 {
-                       break
-               }
+               i6 := x6.AuxInt
                if x6.Aux != s {
                        break
                }
@@ -6737,9 +7455,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x7.Op != OpARM64MOVBUload {
                        break
                }
-               if x7.AuxInt != i-7 {
-                       break
-               }
+               i7 := x7.AuxInt
                if x7.Aux != s {
                        break
                }
@@ -6749,7 +7465,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if mem != x7.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
@@ -6759,134 +7475,31 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
                v1.Aux = s
                v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i - 7
+               v2.AuxInt = i0
                v2.AddArg(p)
                v1.AddArg(v2)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]        y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))    y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i+2] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i+3] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1   && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3) != nil     && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i] p) mem))
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && i4 == i0+4   && i5 == i0+5   && i6 == i0+6   && i7 == i0+7   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1         && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o0.AuxInt != 8 {
-                       break
-               }
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o1.AuxInt != 16 {
-                       break
-               }
-               s0 := o1.Args[0]
-               if s0.Op != OpARM64SLLconst {
-                       break
-               }
-               if s0.AuxInt != 24 {
-                       break
-               }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
-                       break
-               }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o1.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x1.AuxInt != i+1 {
-                       break
-               }
-               if x1.Aux != s {
-                       break
-               }
-               if p != x1.Args[0] {
-                       break
-               }
-               if mem != x1.Args[1] {
-                       break
-               }
-               y2 := o0.Args[1]
-               if y2.Op != OpARM64MOVDnop {
-                       break
-               }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x2.AuxInt != i+2 {
-                       break
-               }
-               if x2.Aux != s {
-                       break
-               }
-               if p != x2.Args[0] {
-                       break
-               }
-               if mem != x2.Args[1] {
-                       break
-               }
-               y3 := v.Args[1]
-               if y3.Op != OpARM64MOVDnop {
-                       break
-               }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x3.AuxInt != i+3 {
-                       break
-               }
-               if x3.Aux != s {
-                       break
-               }
-               if p != x3.Args[0] {
-                       break
-               }
-               if mem != x3.Args[1] {
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i
-               v2.AddArg(p)
-               v1.AddArg(v2)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]    y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem)))    y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i+2] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i+3] {s} p mem)))    y4:(MOVDnop x4:(MOVBUload [i+4] {s} p mem)))    y5:(MOVDnop x5:(MOVBUload [i+5] {s} p mem)))    y6:(MOVDnop x6:(MOVBUload [i+6] {s} p mem)))    y7:(MOVDnop x7:(MOVBUload [i+7] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1   && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1         && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1         && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil         && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)     && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)     && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)     && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)     && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)     && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i] p) mem))
-       for {
-               t := v.Type
-               o0 := v.Args[0]
+               i7 := x7.AuxInt
+               s := x7.Aux
+               p := x7.Args[0]
+               mem := x7.Args[1]
+               o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -6943,10 +7556,16 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -6955,9 +7574,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i+1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -6975,9 +7592,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i+2 {
-                       break
-               }
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -6995,9 +7610,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != i+3 {
-                       break
-               }
+               i3 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -7015,9 +7628,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if x4.AuxInt != i+4 {
-                       break
-               }
+               i4 := x4.AuxInt
                if x4.Aux != s {
                        break
                }
@@ -7035,9 +7646,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x5.Op != OpARM64MOVBUload {
                        break
                }
-               if x5.AuxInt != i+5 {
-                       break
-               }
+               i5 := x5.AuxInt
                if x5.Aux != s {
                        break
                }
@@ -7055,9 +7664,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if x6.Op != OpARM64MOVBUload {
                        break
                }
-               if x6.AuxInt != i+6 {
-                       break
-               }
+               i6 := x6.AuxInt
                if x6.Aux != s {
                        break
                }
@@ -7067,27 +7674,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                if mem != x6.Args[1] {
                        break
                }
-               y7 := v.Args[1]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x7.AuxInt != i+7 {
-                       break
-               }
-               if x7.Aux != s {
-                       break
-               }
-               if p != x7.Args[0] {
-                       break
-               }
-               if mem != x7.Args[1] {
-                       break
-               }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
@@ -7097,7 +7684,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
                v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
                v1.Aux = s
                v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i
+               v2.AuxInt = i0
                v2.AddArg(p)
                v1.AddArg(v2)
                v1.AddArg(mem)
@@ -7107,7 +7694,7 @@ func rewriteValueARM64_OpARM64OR(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
-       // match: (ORconst  [0]  x)
+       // match: (ORconst [0] x)
        // cond:
        // result: x
        for {
@@ -7120,7 +7707,7 @@ func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORconst  [-1] _)
+       // match: (ORconst [-1] _)
        // cond:
        // result: (MOVDconst [-1])
        for {
@@ -7131,7 +7718,7 @@ func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
                v.AuxInt = -1
                return true
        }
-       // match: (ORconst  [c] (MOVDconst [d]))
+       // match: (ORconst [c] (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [c|d])
        for {
@@ -7145,7 +7732,7 @@ func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
                v.AuxInt = c | d
                return true
        }
-       // match: (ORconst  [c] (ORconst [d] x))
+       // match: (ORconst [c] (ORconst [d] x))
        // cond:
        // result: (ORconst [c|d] x)
        for {
@@ -7166,7 +7753,7 @@ func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
 func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (ORshiftLL  (MOVDconst [c]) x [d])
+       // match: (ORshiftLL (MOVDconst [c]) x [d])
        // cond:
        // result: (ORconst  [c] (SLLconst <x.Type> x [d]))
        for {
@@ -7185,7 +7772,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (ORshiftLL  x (MOVDconst [c]) [d])
+       // match: (ORshiftLL x (MOVDconst [c]) [d])
        // cond:
        // result: (ORconst  x [int64(uint64(c)<<uint64(d))])
        for {
@@ -7201,7 +7788,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORshiftLL  x y:(SLLconst x [c]) [d])
+       // match: (ORshiftLL x y:(SLLconst x [c]) [d])
        // cond: c==d
        // result: y
        for {
@@ -7223,7 +7810,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: ( ORshiftLL [c] (SRLconst x [64-c]) x)
+       // match: (ORshiftLL [c] (SRLconst x [64-c]) x)
        // cond:
        // result: (RORconst [64-c] x)
        for {
@@ -7244,7 +7831,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORshiftLL <t> [c] (SRLconst (MOVWUreg x) [32-c]) x)
+       // match: (ORshiftLL <t> [c] (SRLconst (MOVWUreg x) [32-c]) x)
        // cond: c < 32 && t.Size() == 4
        // result: (RORWconst [32-c] x)
        for {
@@ -7273,9 +7860,9 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORshiftLL <t> [8]    y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem))     y1:(MOVDnop x1:(MOVBUload [i+1] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1   && y0.Uses == 1 && y1.Uses == 1         && mergePoint(b,x0,x1) != nil   && clobber(x0) && clobber(x1)   && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       // cond: i1 == i0+1     && x0.Uses == 1 && x1.Uses == 1         && y0.Uses == 1 && y1.Uses == 1         && mergePoint(b,x0,x1) != nil   && clobber(x0) && clobber(x1)   && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
                t := v.Type
                if v.AuxInt != 8 {
@@ -7289,7 +7876,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7301,9 +7888,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i+1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7313,7 +7898,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -7322,15 +7907,15 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(v0)
                v0.Aux = s
                v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i
+               v1.AuxInt = i0
                v1.AddArg(p)
                v0.AddArg(v1)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16]                    x0:(MOVHUload [i]   {s} p mem)      y1:(MOVDnop x1:(MOVBUload [i+2] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i+3] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1   && y1.Uses == 1 && y2.Uses == 1         && o0.Uses == 1         && mergePoint(b,x0,x1,x2) != nil        && clobber(x0) && clobber(x1) && clobber(x2)    && clobber(y1) && clobber(y2)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
+       // cond: i2 == i0+2     && i3 == i0+3   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1         && y1.Uses == 1 && y2.Uses == 1         && o0.Uses == 1         && mergePoint(b,x0,x1,x2) != nil        && clobber(x0) && clobber(x1) && clobber(x2)    && clobber(y1) && clobber(y2)   && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
                t := v.Type
                if v.AuxInt != 24 {
@@ -7347,7 +7932,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVHUload {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7359,9 +7944,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i+2 {
-                       break
-               }
+               i2 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7379,9 +7962,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i+3 {
-                       break
-               }
+               i3 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -7391,7 +7972,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2)
@@ -7400,15 +7981,15 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(v0)
                v0.Aux = s
                v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i
+               v1.AuxInt = i0
                v1.AddArg(p)
                v0.AddArg(v1)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]              x0:(MOVWUload [i]   {s} p mem)      y1:(MOVDnop x1:(MOVBUload [i+4] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i+5] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i+6] {s} p mem)))    y4:(MOVDnop x4:(MOVBUload [i+7] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1   && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4) != nil  && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)      && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)     && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i] p) mem)
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
+       // cond: i4 == i0+4     && i5 == i0+5   && i6 == i0+6   && i7 == i0+7   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1         && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4) != nil  && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)      && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)     && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
                t := v.Type
                if v.AuxInt != 56 {
@@ -7439,7 +8020,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVWUload {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7451,9 +8032,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i+4 {
-                       break
-               }
+               i4 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7471,9 +8050,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i+5 {
-                       break
-               }
+               i5 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -7491,9 +8068,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != i+6 {
-                       break
-               }
+               i6 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -7511,9 +8086,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if x4.AuxInt != i+7 {
-                       break
-               }
+               i7 := x4.AuxInt
                if x4.Aux != s {
                        break
                }
@@ -7523,7 +8096,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x4.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               if !(i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4)
@@ -7532,15 +8105,15 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(v0)
                v0.Aux = s
                v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i
+               v1.AuxInt = i0
                v1.AddArg(p)
                v0.AddArg(v1)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [8]    y0:(MOVDnop x0:(MOVBUload [i]   {s} p mem))     y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))
-       // cond: ((i-1)%2 == 0 || i-1<256 && i-1>-256 && !isArg(s) && !isAuto(s))       && x0.Uses == 1 && x1.Uses == 1         && y0.Uses == 1 && y1.Uses == 1         && mergePoint(b,x0,x1) != nil   && clobber(x0) && clobber(x1)   && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i-1] {s} p mem))
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1     && (i0%2 == 0 || i0<256 && i0>-256 && !isArg(s) && !isAuto(s))  && x0.Uses == 1 && x1.Uses == 1         && y0.Uses == 1 && y1.Uses == 1         && mergePoint(b,x0,x1) != nil   && clobber(x0) && clobber(x1)   && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
        for {
                t := v.Type
                if v.AuxInt != 8 {
@@ -7554,7 +8127,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i := x0.AuxInt
+               i1 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7566,9 +8139,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i0 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7578,7 +8149,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               if !(((i-1)%2 == 0 || i-1 < 256 && i-1 > -256 && !isArg(s) && !isAuto(s)) && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               if !(i1 == i0+1 && (i0%2 == 0 || i0 < 256 && i0 > -256 && !isArg(s) && !isAuto(s)) && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -7586,16 +8157,16 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
-               v1.AuxInt = i - 1
+               v1.AuxInt = i0
                v1.Aux = s
                v1.AddArg(p)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16]        y0:(REV16W  x0:(MOVHUload [i]   {s} p mem))     y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1   && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1         && o0.Uses == 1         && mergePoint(b,x0,x1,x2) != nil        && clobber(x0) && clobber(x1) && clobber(x2)    && clobber(y0) && clobber(y1) && clobber(y2)    && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i-2] p) mem))
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [i2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1         && o0.Uses == 1         && mergePoint(b,x0,x1,x2) != nil        && clobber(x0) && clobber(x1) && clobber(x2)    && clobber(y0) && clobber(y1) && clobber(y2)    && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                if v.AuxInt != 24 {
@@ -7616,7 +8187,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVHUload {
                        break
                }
-               i := x0.AuxInt
+               i2 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7628,9 +8199,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7648,9 +8217,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i-2 {
-                       break
-               }
+               i0 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -7660,7 +8227,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2)
@@ -7670,16 +8237,16 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
                v1.Aux = s
                v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i - 2
+               v2.AuxInt = i0
                v2.AddArg(p)
                v1.AddArg(v2)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]  y0:(REVW    x0:(MOVWUload [i]   {s} p mem))     y1:(MOVDnop x1:(MOVBUload [i-1] {s} p mem)))    y2:(MOVDnop x2:(MOVBUload [i-2] {s} p mem)))    y3:(MOVDnop x3:(MOVBUload [i-3] {s} p mem)))    y4:(MOVDnop x4:(MOVBUload [i-4] {s} p mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1   && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4) != nil  && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)      && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)      && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i-4] p) mem))
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [i4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1     && i2 == i0+2   && i3 == i0+3   && i4 == i0+4   && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1         && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1         && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1         && mergePoint(b,x0,x1,x2,x3,x4) != nil  && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)      && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)      && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                if v.AuxInt != 56 {
@@ -7714,7 +8281,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x0.Op != OpARM64MOVWUload {
                        break
                }
-               i := x0.AuxInt
+               i4 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                mem := x0.Args[1]
@@ -7726,9 +8293,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != i-1 {
-                       break
-               }
+               i3 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -7746,9 +8311,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != i-2 {
-                       break
-               }
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -7766,9 +8329,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != i-3 {
-                       break
-               }
+               i1 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -7786,9 +8347,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if x4.AuxInt != i-4 {
-                       break
-               }
+               i0 := x4.AuxInt
                if x4.Aux != s {
                        break
                }
@@ -7798,7 +8357,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                if mem != x4.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4)
@@ -7808,7 +8367,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
                v1.Aux = s
                v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i - 4
+               v2.AuxInt = i0
                v2.AddArg(p)
                v1.AddArg(v2)
                v1.AddArg(mem)
@@ -7820,7 +8379,7 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
 func rewriteValueARM64_OpARM64ORshiftRA(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (ORshiftRA  (MOVDconst [c]) x [d])
+       // match: (ORshiftRA (MOVDconst [c]) x [d])
        // cond:
        // result: (ORconst  [c] (SRAconst <x.Type> x [d]))
        for {
@@ -7839,7 +8398,7 @@ func rewriteValueARM64_OpARM64ORshiftRA(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (ORshiftRA  x (MOVDconst [c]) [d])
+       // match: (ORshiftRA x (MOVDconst [c]) [d])
        // cond:
        // result: (ORconst  x [int64(int64(c)>>uint64(d))])
        for {
@@ -7855,7 +8414,7 @@ func rewriteValueARM64_OpARM64ORshiftRA(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORshiftRA  x y:(SRAconst x [c]) [d])
+       // match: (ORshiftRA x y:(SRAconst x [c]) [d])
        // cond: c==d
        // result: y
        for {
@@ -7882,7 +8441,7 @@ func rewriteValueARM64_OpARM64ORshiftRA(v *Value) bool {
 func rewriteValueARM64_OpARM64ORshiftRL(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (ORshiftRL  (MOVDconst [c]) x [d])
+       // match: (ORshiftRL (MOVDconst [c]) x [d])
        // cond:
        // result: (ORconst  [c] (SRLconst <x.Type> x [d]))
        for {
@@ -7901,7 +8460,7 @@ func rewriteValueARM64_OpARM64ORshiftRL(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (ORshiftRL  x (MOVDconst [c]) [d])
+       // match: (ORshiftRL x (MOVDconst [c]) [d])
        // cond:
        // result: (ORconst  x [int64(uint64(c)>>uint64(d))])
        for {
@@ -7917,7 +8476,7 @@ func rewriteValueARM64_OpARM64ORshiftRL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORshiftRL  x y:(SRLconst x [c]) [d])
+       // match: (ORshiftRL x y:(SRLconst x [c]) [d])
        // cond: c==d
        // result: y
        for {
@@ -7939,7 +8498,7 @@ func rewriteValueARM64_OpARM64ORshiftRL(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: ( ORshiftRL [c] (SLLconst x [64-c]) x)
+       // match: (ORshiftRL [c] (SLLconst x [64-c]) x)
        // cond:
        // result: (RORconst [   c] x)
        for {
@@ -7960,7 +8519,7 @@ func rewriteValueARM64_OpARM64ORshiftRL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: ( ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x))
+       // match: (ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x))
        // cond: c < 32 && t.Size() == 4
        // result: (RORWconst [   c] x)
        for {
@@ -8238,7 +8797,7 @@ func rewriteValueARM64_OpARM64SUB(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64SUBconst(v *Value) bool {
-       // match: (SUBconst [0]  x)
+       // match: (SUBconst [0] x)
        // cond:
        // result: x
        for {
@@ -8455,7 +9014,7 @@ func rewriteValueARM64_OpARM64UDIV(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (UDIV  (MOVDconst [c]) (MOVDconst [d]))
+       // match: (UDIV (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(uint64(c)/uint64(d))])
        for {
@@ -8566,7 +9125,7 @@ func rewriteValueARM64_OpARM64UMOD(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (UMOD  (MOVDconst [c]) (MOVDconst [d]))
+       // match: (UMOD (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [int64(uint64(c)%uint64(d))])
        for {
@@ -8642,31 +9201,31 @@ func rewriteValueARM64_OpARM64UMODW(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64XOR(v *Value) bool {
-       // match: (XOR (MOVDconst [c]) x)
+       // match: (XOR x (MOVDconst [c]))
        // cond:
        // result: (XORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpARM64XORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XOR x (MOVDconst [c]))
+       // match: (XOR (MOVDconst [c]) x)
        // cond:
        // result: (XORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpARM64XORconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -8789,7 +9348,7 @@ func rewriteValueARM64_OpARM64XOR(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64XORconst(v *Value) bool {
-       // match: (XORconst [0]  x)
+       // match: (XORconst [0] x)
        // cond:
        // result: x
        for {
@@ -9408,7 +9967,7 @@ func rewriteValueARM64_OpAtomicExchange64(v *Value) bool {
        }
 }
 func rewriteValueARM64_OpAtomicLoad32(v *Value) bool {
-       // match: (AtomicLoad32  ptr mem)
+       // match: (AtomicLoad32 ptr mem)
        // cond:
        // result: (LDARW ptr mem)
        for {
@@ -9421,7 +9980,7 @@ func rewriteValueARM64_OpAtomicLoad32(v *Value) bool {
        }
 }
 func rewriteValueARM64_OpAtomicLoad64(v *Value) bool {
-       // match: (AtomicLoad64  ptr mem)
+       // match: (AtomicLoad64 ptr mem)
        // cond:
        // result: (LDAR  ptr mem)
        for {
@@ -9447,7 +10006,7 @@ func rewriteValueARM64_OpAtomicLoadPtr(v *Value) bool {
        }
 }
 func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
-       // match: (AtomicOr8  ptr val mem)
+       // match: (AtomicOr8 ptr val mem)
        // cond:
        // result: (LoweredAtomicOr8  ptr val mem)
        for {
@@ -9462,7 +10021,7 @@ func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
        }
 }
 func rewriteValueARM64_OpAtomicStore32(v *Value) bool {
-       // match: (AtomicStore32      ptr val mem)
+       // match: (AtomicStore32 ptr val mem)
        // cond:
        // result: (STLRW ptr val mem)
        for {
@@ -9477,7 +10036,7 @@ func rewriteValueARM64_OpAtomicStore32(v *Value) bool {
        }
 }
 func rewriteValueARM64_OpAtomicStore64(v *Value) bool {
-       // match: (AtomicStore64      ptr val mem)
+       // match: (AtomicStore64 ptr val mem)
        // cond:
        // result: (STLR  ptr val mem)
        for {
@@ -11468,7 +12027,7 @@ func rewriteValueARM64_OpLsh16x32(v *Value) bool {
 func rewriteValueARM64_OpLsh16x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x64  x (MOVDconst [c]))
+       // match: (Lsh16x64 x (MOVDconst [c]))
        // cond: uint64(c) < 16
        // result: (SLLconst x [c])
        for {
@@ -11486,7 +12045,7 @@ func rewriteValueARM64_OpLsh16x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh16x64  _ (MOVDconst [c]))
+       // match: (Lsh16x64 _ (MOVDconst [c]))
        // cond: uint64(c) >= 16
        // result: (MOVDconst [0])
        for {
@@ -11529,7 +12088,7 @@ func rewriteValueARM64_OpLsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x8  <t> x y)
+       // match: (Lsh16x8 <t> x y)
        // cond:
        // result: (CSELULT (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -11620,7 +12179,7 @@ func rewriteValueARM64_OpLsh32x32(v *Value) bool {
 func rewriteValueARM64_OpLsh32x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x64  x (MOVDconst [c]))
+       // match: (Lsh32x64 x (MOVDconst [c]))
        // cond: uint64(c) < 32
        // result: (SLLconst x [c])
        for {
@@ -11638,7 +12197,7 @@ func rewriteValueARM64_OpLsh32x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh32x64  _ (MOVDconst [c]))
+       // match: (Lsh32x64 _ (MOVDconst [c]))
        // cond: uint64(c) >= 32
        // result: (MOVDconst [0])
        for {
@@ -11681,7 +12240,7 @@ func rewriteValueARM64_OpLsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x8  <t> x y)
+       // match: (Lsh32x8 <t> x y)
        // cond:
        // result: (CSELULT (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -11772,7 +12331,7 @@ func rewriteValueARM64_OpLsh64x32(v *Value) bool {
 func rewriteValueARM64_OpLsh64x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh64x64  x (MOVDconst [c]))
+       // match: (Lsh64x64 x (MOVDconst [c]))
        // cond: uint64(c) < 64
        // result: (SLLconst x [c])
        for {
@@ -11790,7 +12349,7 @@ func rewriteValueARM64_OpLsh64x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh64x64  _ (MOVDconst [c]))
+       // match: (Lsh64x64 _ (MOVDconst [c]))
        // cond: uint64(c) >= 64
        // result: (MOVDconst [0])
        for {
@@ -11833,7 +12392,7 @@ func rewriteValueARM64_OpLsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x8  <t> x y)
+       // match: (Lsh64x8 <t> x y)
        // cond:
        // result: (CSELULT (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -11924,7 +12483,7 @@ func rewriteValueARM64_OpLsh8x32(v *Value) bool {
 func rewriteValueARM64_OpLsh8x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x64   x (MOVDconst [c]))
+       // match: (Lsh8x64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SLLconst x [c])
        for {
@@ -11942,7 +12501,7 @@ func rewriteValueARM64_OpLsh8x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh8x64   _ (MOVDconst [c]))
+       // match: (Lsh8x64 _ (MOVDconst [c]))
        // cond: uint64(c) >= 8
        // result: (MOVDconst [0])
        for {
@@ -11985,7 +12544,7 @@ func rewriteValueARM64_OpLsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x8  <t> x y)
+       // match: (Lsh8x8 <t> x y)
        // cond:
        // result: (CSELULT (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -13098,7 +13657,7 @@ func rewriteValueARM64_OpRsh16Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16Ux8  <t> x y)
+       // match: (Rsh16Ux8 <t> x y)
        // cond:
        // result: (CSELULT (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -13195,7 +13754,7 @@ func rewriteValueARM64_OpRsh16x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x64  x (MOVDconst [c]))
+       // match: (Rsh16x64 x (MOVDconst [c]))
        // cond: uint64(c) < 16
        // result: (SRAconst (SignExt16to64 x) [c])
        for {
@@ -13263,7 +13822,7 @@ func rewriteValueARM64_OpRsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x8  x y)
+       // match: (Rsh16x8 x y)
        // cond:
        // result: (SRA (SignExt16to64 x) (CSELULT <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
        for {
@@ -13426,7 +13985,7 @@ func rewriteValueARM64_OpRsh32Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32Ux8  <t> x y)
+       // match: (Rsh32Ux8 <t> x y)
        // cond:
        // result: (CSELULT (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -13523,7 +14082,7 @@ func rewriteValueARM64_OpRsh32x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x64  x (MOVDconst [c]))
+       // match: (Rsh32x64 x (MOVDconst [c]))
        // cond: uint64(c) < 32
        // result: (SRAconst (SignExt32to64 x) [c])
        for {
@@ -13591,7 +14150,7 @@ func rewriteValueARM64_OpRsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x8  x y)
+       // match: (Rsh32x8 x y)
        // cond:
        // result: (SRA (SignExt32to64 x) (CSELULT <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
        for {
@@ -13744,7 +14303,7 @@ func rewriteValueARM64_OpRsh64Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64Ux8  <t> x y)
+       // match: (Rsh64Ux8 <t> x y)
        // cond:
        // result: (CSELULT (SRL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -13833,7 +14392,7 @@ func rewriteValueARM64_OpRsh64x32(v *Value) bool {
 func rewriteValueARM64_OpRsh64x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64x64  x (MOVDconst [c]))
+       // match: (Rsh64x64 x (MOVDconst [c]))
        // cond: uint64(c) < 64
        // result: (SRAconst x [c])
        for {
@@ -13895,7 +14454,7 @@ func rewriteValueARM64_OpRsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x8  x y)
+       // match: (Rsh64x8 x y)
        // cond:
        // result: (SRA x (CSELULT <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
        for {
@@ -13991,7 +14550,7 @@ func rewriteValueARM64_OpRsh8Ux64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux64  x (MOVDconst [c]))
+       // match: (Rsh8Ux64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SRLconst (ZeroExt8to64  x) [c])
        for {
@@ -14011,7 +14570,7 @@ func rewriteValueARM64_OpRsh8Ux64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux64  _ (MOVDconst [c]))
+       // match: (Rsh8Ux64 _ (MOVDconst [c]))
        // cond: uint64(c) >= 8
        // result: (MOVDconst [0])
        for {
@@ -14056,7 +14615,7 @@ func rewriteValueARM64_OpRsh8Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux8  <t> x y)
+       // match: (Rsh8Ux8 <t> x y)
        // cond:
        // result: (CSELULT (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
        for {
@@ -14153,7 +14712,7 @@ func rewriteValueARM64_OpRsh8x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x64   x (MOVDconst [c]))
+       // match: (Rsh8x64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SRAconst (SignExt8to64  x) [c])
        for {
@@ -14173,7 +14732,7 @@ func rewriteValueARM64_OpRsh8x64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x64  x (MOVDconst [c]))
+       // match: (Rsh8x64 x (MOVDconst [c]))
        // cond: uint64(c) >= 8
        // result: (SRAconst (SignExt8to64  x) [63])
        for {
@@ -14221,7 +14780,7 @@ func rewriteValueARM64_OpRsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x8  x y)
+       // match: (Rsh8x8 x y)
        // cond:
        // result: (SRA (SignExt8to64 x) (CSELULT <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
        for {
index 3dd97ad0afa313e5be3688b2e2e7dc135884c7ca..082c67ad7828d883bfab74c3011db7d51e9dd452 100644 (file)
@@ -688,7 +688,7 @@ func rewriteValueMIPS_OpAtomicAnd8(v *Value) bool {
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (AtomicAnd8  ptr val mem)
+       // match: (AtomicAnd8 ptr val mem)
        // cond: !config.BigEndian
        // result: (LoweredAtomicAnd (AND <types.UInt32Ptr> (MOVWconst [^3]) ptr)               (OR <types.UInt32> (SLL <types.UInt32> (ZeroExt8to32 val)                       (SLLconst <types.UInt32> [3]                            (ANDconst  <types.UInt32> [3] ptr)))            (NORconst [0] <types.UInt32> (SLL <types.UInt32>                        (MOVWconst [0xff]) (SLLconst <types.UInt32> [3]                                 (ANDconst <types.UInt32> [3] ptr))))) mem)
        for {
@@ -737,7 +737,7 @@ func rewriteValueMIPS_OpAtomicAnd8(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (AtomicAnd8  ptr val mem)
+       // match: (AtomicAnd8 ptr val mem)
        // cond: config.BigEndian
        // result: (LoweredAtomicAnd (AND <types.UInt32Ptr> (MOVWconst [^3]) ptr)               (OR <types.UInt32> (SLL <types.UInt32> (ZeroExt8to32 val)                       (SLLconst <types.UInt32> [3]                            (ANDconst  <types.UInt32> [3]                                   (XORconst <types.UInt32> [3] ptr))))            (NORconst [0] <types.UInt32> (SLL <types.UInt32>                        (MOVWconst [0xff]) (SLLconst <types.UInt32> [3]                                 (ANDconst <types.UInt32> [3]                                    (XORconst <types.UInt32> [3] ptr)))))) mem)
        for {
@@ -827,7 +827,7 @@ func rewriteValueMIPS_OpAtomicExchange32(v *Value) bool {
        }
 }
 func rewriteValueMIPS_OpAtomicLoad32(v *Value) bool {
-       // match: (AtomicLoad32  ptr mem)
+       // match: (AtomicLoad32 ptr mem)
        // cond:
        // result: (LoweredAtomicLoad ptr mem)
        for {
@@ -929,7 +929,7 @@ func rewriteValueMIPS_OpAtomicOr8(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpAtomicStore32(v *Value) bool {
-       // match: (AtomicStore32      ptr val mem)
+       // match: (AtomicStore32 ptr val mem)
        // cond:
        // result: (LoweredAtomicStore ptr val mem)
        for {
@@ -2779,31 +2779,31 @@ func rewriteValueMIPS_OpLsh8x8(v *Value) bool {
        }
 }
 func rewriteValueMIPS_OpMIPSADD(v *Value) bool {
-       // match: (ADD (MOVWconst [c]) x)
+       // match: (ADD x (MOVWconst [c]))
        // cond:
        // result: (ADDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPSMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMIPSADDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADD x (MOVWconst [c]))
+       // match: (ADD (MOVWconst [c]) x)
        // cond:
        // result: (ADDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPSMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpMIPSADDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -2860,7 +2860,7 @@ func rewriteValueMIPS_OpMIPSADDconst(v *Value) bool {
                v.AddArg(ptr)
                return true
        }
-       // match: (ADDconst [0]  x)
+       // match: (ADDconst [0] x)
        // cond:
        // result: x
        for {
@@ -2924,31 +2924,31 @@ func rewriteValueMIPS_OpMIPSADDconst(v *Value) bool {
 func rewriteValueMIPS_OpMIPSAND(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (AND (MOVWconst [c]) x)
+       // match: (AND x (MOVWconst [c]))
        // cond:
        // result: (ANDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPSMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMIPSANDconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (AND x (MOVWconst [c]))
+       // match: (AND (MOVWconst [c]) x)
        // cond:
        // result: (ANDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPSMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpMIPSANDconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -2995,10 +2995,38 @@ func rewriteValueMIPS_OpMIPSAND(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (AND (SGTUconst [1] y) (SGTUconst [1] x))
+       // cond:
+       // result: (SGTUconst [1] (OR <x.Type> x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSSGTUconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               y := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSSGTUconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               x := v_1.Args[0]
+               v.reset(OpMIPSSGTUconst)
+               v.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpMIPSOR, x.Type)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueMIPS_OpMIPSANDconst(v *Value) bool {
-       // match: (ANDconst [0]  _)
+       // match: (ANDconst [0] _)
        // cond:
        // result: (MOVWconst [0])
        for {
@@ -3349,7 +3377,7 @@ func rewriteValueMIPS_OpMIPSMOVBUreg(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMOVBload(v *Value) bool {
-       // match: (MOVBload  [off1] {sym} x:(ADDconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} x:(ADDconst [off2] ptr) mem)
        // cond: (is16Bit(off1+off2) || x.Uses == 1)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -3492,7 +3520,7 @@ func rewriteValueMIPS_OpMIPSMOVBreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBreg  (MOVWconst [c]))
+       // match: (MOVBreg (MOVWconst [c]))
        // cond:
        // result: (MOVWconst [int64(int8(c))])
        for {
@@ -3739,7 +3767,7 @@ func rewriteValueMIPS_OpMIPSMOVBstorezero(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMOVDload(v *Value) bool {
-       // match: (MOVDload  [off1] {sym} x:(ADDconst [off2] ptr) mem)
+       // match: (MOVDload [off1] {sym} x:(ADDconst [off2] ptr) mem)
        // cond: (is16Bit(off1+off2) || x.Uses == 1)
        // result: (MOVDload  [off1+off2] {sym} ptr mem)
        for {
@@ -3866,7 +3894,7 @@ func rewriteValueMIPS_OpMIPSMOVDstore(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMOVFload(v *Value) bool {
-       // match: (MOVFload  [off1] {sym} x:(ADDconst [off2] ptr) mem)
+       // match: (MOVFload [off1] {sym} x:(ADDconst [off2] ptr) mem)
        // cond: (is16Bit(off1+off2) || x.Uses == 1)
        // result: (MOVFload  [off1+off2] {sym} ptr mem)
        for {
@@ -4173,7 +4201,7 @@ func rewriteValueMIPS_OpMIPSMOVHUreg(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMOVHload(v *Value) bool {
-       // match: (MOVHload  [off1] {sym} x:(ADDconst [off2] ptr) mem)
+       // match: (MOVHload [off1] {sym} x:(ADDconst [off2] ptr) mem)
        // cond: (is16Bit(off1+off2) || x.Uses == 1)
        // result: (MOVHload  [off1+off2] {sym} ptr mem)
        for {
@@ -4364,7 +4392,7 @@ func rewriteValueMIPS_OpMIPSMOVHreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg  (MOVWconst [c]))
+       // match: (MOVHreg (MOVWconst [c]))
        // cond:
        // result: (MOVWconst [int64(int16(c))])
        for {
@@ -4569,7 +4597,7 @@ func rewriteValueMIPS_OpMIPSMOVHstorezero(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMOVWload(v *Value) bool {
-       // match: (MOVWload  [off1] {sym} x:(ADDconst [off2] ptr) mem)
+       // match: (MOVWload [off1] {sym} x:(ADDconst [off2] ptr) mem)
        // cond: (is16Bit(off1+off2) || x.Uses == 1)
        // result: (MOVWload  [off1+off2] {sym} ptr mem)
        for {
@@ -4654,7 +4682,7 @@ func rewriteValueMIPS_OpMIPSMOVWreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg  (MOVWconst [c]))
+       // match: (MOVWreg (MOVWconst [c]))
        // cond:
        // result: (MOVWconst [c])
        for {
@@ -4817,7 +4845,7 @@ func rewriteValueMIPS_OpMIPSMOVWstorezero(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
-       // match: (MUL (MOVWconst [0]) _ )
+       // match: (MUL (MOVWconst [0]) _)
        // cond:
        // result: (MOVWconst [0])
        for {
@@ -4832,7 +4860,22 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (MUL (MOVWconst [1]) x )
+       // match: (MUL _ (MOVWconst [0]))
+       // cond:
+       // result: (MOVWconst [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MUL (MOVWconst [1]) x)
        // cond:
        // result: x
        for {
@@ -4849,7 +4892,24 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVWconst [-1]) x )
+       // match: (MUL x (MOVWconst [1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVWconst [-1]) x)
        // cond:
        // result: (NEG x)
        for {
@@ -4865,7 +4925,23 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVWconst [c]) x )
+       // match: (MUL x (MOVWconst [-1]))
+       // cond:
+       // result: (NEG x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpMIPSNEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVWconst [c]) x)
        // cond: isPowerOfTwo(int64(uint32(c)))
        // result: (SLLconst [log2(int64(uint32(c)))] x)
        for {
@@ -4883,6 +4959,24 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MUL x (MOVWconst [c]))
+       // cond: isPowerOfTwo(int64(uint32(c)))
+       // result: (SLLconst [log2(int64(uint32(c)))] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(int64(uint32(c)))) {
+                       break
+               }
+               v.reset(OpMIPSSLLconst)
+               v.AuxInt = log2(int64(uint32(c)))
+               v.AddArg(x)
+               return true
+       }
        // match: (MUL (MOVWconst [c]) (MOVWconst [d]))
        // cond:
        // result: (MOVWconst [int64(int32(c)*int32(d))])
@@ -4901,6 +4995,24 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
                v.AuxInt = int64(int32(c) * int32(d))
                return true
        }
+       // match: (MUL (MOVWconst [d]) (MOVWconst [c]))
+       // cond:
+       // result: (MOVWconst [int64(int32(c)*int32(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = int64(int32(c) * int32(d))
+               return true
+       }
        return false
 }
 func rewriteValueMIPS_OpMIPSNEG(v *Value) bool {
@@ -4920,31 +5032,31 @@ func rewriteValueMIPS_OpMIPSNEG(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSNOR(v *Value) bool {
-       // match: (NOR (MOVWconst [c]) x)
+       // match: (NOR x (MOVWconst [c]))
        // cond:
        // result: (NORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPSMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMIPSNORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (NOR x (MOVWconst [c]))
+       // match: (NOR (MOVWconst [c]) x)
        // cond:
        // result: (NORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPSMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpMIPSNORconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -4972,37 +5084,37 @@ func rewriteValueMIPS_OpMIPSNORconst(v *Value) bool {
 func rewriteValueMIPS_OpMIPSOR(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (OR  (MOVWconst [c]) x)
+       // match: (OR x (MOVWconst [c]))
        // cond:
        // result: (ORconst  [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPSMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMIPSORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (OR  x (MOVWconst [c]))
+       // match: (OR (MOVWconst [c]) x)
        // cond:
        // result: (ORconst  [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPSMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpMIPSORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (OR  x x)
+       // match: (OR x x)
        // cond:
        // result: x
        for {
@@ -5036,10 +5148,31 @@ func rewriteValueMIPS_OpMIPSOR(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (OR (SGTUzero y) (SGTUzero x))
+       // cond:
+       // result: (SGTUzero (OR <x.Type> x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSSGTUzero {
+                       break
+               }
+               y := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSSGTUzero {
+                       break
+               }
+               x := v_1.Args[0]
+               v.reset(OpMIPSSGTUzero)
+               v0 := b.NewValue0(v.Pos, OpMIPSOR, x.Type)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueMIPS_OpMIPSORconst(v *Value) bool {
-       // match: (ORconst  [0]  x)
+       // match: (ORconst [0] x)
        // cond:
        // result: x
        for {
@@ -5052,7 +5185,7 @@ func rewriteValueMIPS_OpMIPSORconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORconst  [-1] _)
+       // match: (ORconst [-1] _)
        // cond:
        // result: (MOVWconst [-1])
        for {
@@ -5096,7 +5229,7 @@ func rewriteValueMIPS_OpMIPSORconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSSGT(v *Value) bool {
-       // match: (SGT  (MOVWconst [c]) x)
+       // match: (SGT (MOVWconst [c]) x)
        // cond:
        // result: (SGTconst  [c] x)
        for {
@@ -5737,7 +5870,7 @@ func rewriteValueMIPS_OpMIPSSUB(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSSUBconst(v *Value) bool {
-       // match: (SUBconst [0]  x)
+       // match: (SUBconst [0] x)
        // cond:
        // result: x
        for {
@@ -5799,31 +5932,31 @@ func rewriteValueMIPS_OpMIPSSUBconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSXOR(v *Value) bool {
-       // match: (XOR (MOVWconst [c]) x)
+       // match: (XOR x (MOVWconst [c]))
        // cond:
        // result: (XORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPSMOVWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMIPSXORconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (XOR x (MOVWconst [c]))
+       // match: (XOR (MOVWconst [c]) x)
        // cond:
        // result: (XORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPSMOVWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMOVWconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                v.reset(OpMIPSXORconst)
                v.AuxInt = c
                v.AddArg(x)
@@ -5844,7 +5977,7 @@ func rewriteValueMIPS_OpMIPSXOR(v *Value) bool {
        return false
 }
 func rewriteValueMIPS_OpMIPSXORconst(v *Value) bool {
-       // match: (XORconst [0]  x)
+       // match: (XORconst [0] x)
        // cond:
        // result: x
        for {
@@ -7779,7 +7912,33 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Select0 (MULTU (MOVWconst [0]) _ ))
+       // match: (Select0 (MULTU (MOVWconst [c]) x))
+       // cond: x.Op != OpMIPSMOVWconst
+       // result: (Select0 (MULTU (MOVWconst [c]) x ))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(x.Op != OpMIPSMOVWconst) {
+                       break
+               }
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpMIPSMULTU, MakeTuple(types.UInt32, types.UInt32))
+               v1 := b.NewValue0(v.Pos, OpMIPSMOVWconst, types.UInt32)
+               v1.AuxInt = c
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Select0 (MULTU (MOVWconst [0]) _))
        // cond:
        // result: (MOVWconst [0])
        for {
@@ -7798,7 +7957,26 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Select0 (MULTU (MOVWconst [1]) _ ))
+       // match: (Select0 (MULTU _ (MOVWconst [0])))
+       // cond:
+       // result: (MOVWconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Select0 (MULTU (MOVWconst [1]) _))
        // cond:
        // result: (MOVWconst [0])
        for {
@@ -7817,7 +7995,26 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Select0 (MULTU (MOVWconst [-1]) x ))
+       // match: (Select0 (MULTU _ (MOVWconst [1])))
+       // cond:
+       // result: (MOVWconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Select0 (MULTU (MOVWconst [-1]) x))
        // cond:
        // result: (CMOVZ (ADDconst <x.Type> [-1] x) (MOVWconst [0]) x)
        for {
@@ -7844,7 +8041,34 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select0 (MULTU (MOVWconst [c]) x ))
+       // match: (Select0 (MULTU x (MOVWconst [-1])))
+       // cond:
+       // result: (CMOVZ (ADDconst <x.Type> [-1] x) (MOVWconst [0]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpMIPSCMOVZ)
+               v0 := b.NewValue0(v.Pos, OpMIPSADDconst, x.Type)
+               v0.AuxInt = -1
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMIPSMOVWconst, types.UInt32)
+               v1.AuxInt = 0
+               v.AddArg(v1)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Select0 (MULTU (MOVWconst [c]) x))
        // cond: isPowerOfTwo(int64(uint32(c)))
        // result: (SRLconst [32-log2(int64(uint32(c)))] x)
        for {
@@ -7866,7 +8090,29 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select0 (MULTU  (MOVWconst [c]) (MOVWconst [d])))
+       // match: (Select0 (MULTU x (MOVWconst [c])))
+       // cond: isPowerOfTwo(int64(uint32(c)))
+       // result: (SRLconst [32-log2(int64(uint32(c)))] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isPowerOfTwo(int64(uint32(c)))) {
+                       break
+               }
+               v.reset(OpMIPSSRLconst)
+               v.AuxInt = 32 - log2(int64(uint32(c)))
+               v.AddArg(x)
+               return true
+       }
+       // match: (Select0 (MULTU (MOVWconst [c]) (MOVWconst [d])))
        // cond:
        // result: (MOVWconst [(c*d)>>32])
        for {
@@ -7888,7 +8134,29 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AuxInt = (c * d) >> 32
                return true
        }
-       // match: (Select0 (DIV  (MOVWconst [c]) (MOVWconst [d])))
+       // match: (Select0 (MULTU (MOVWconst [d]) (MOVWconst [c])))
+       // cond:
+       // result: (MOVWconst [(c*d)>>32])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPSMOVWconst {
+                       break
+               }
+               d := v_0_0.AuxInt
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = (c * d) >> 32
+               return true
+       }
+       // match: (Select0 (DIV (MOVWconst [c]) (MOVWconst [d])))
        // cond:
        // result: (MOVWconst [int64(int32(c)%int32(d))])
        for {
@@ -8005,7 +8273,33 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Select1 (MULTU (MOVWconst [0]) _ ))
+       // match: (Select1 (MULTU (MOVWconst [c]) x))
+       // cond: x.Op != OpMIPSMOVWconst
+       // result: (Select1 (MULTU (MOVWconst [c]) x ))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(x.Op != OpMIPSMOVWconst) {
+                       break
+               }
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpMIPSMULTU, MakeTuple(types.UInt32, types.UInt32))
+               v1 := b.NewValue0(v.Pos, OpMIPSMOVWconst, types.UInt32)
+               v1.AuxInt = c
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Select1 (MULTU (MOVWconst [0]) _))
        // cond:
        // result: (MOVWconst [0])
        for {
@@ -8024,7 +8318,26 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Select1 (MULTU (MOVWconst [1]) x ))
+       // match: (Select1 (MULTU _ (MOVWconst [0])))
+       // cond:
+       // result: (MOVWconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Select1 (MULTU (MOVWconst [1]) x))
        // cond:
        // result: x
        for {
@@ -8045,7 +8358,28 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select1 (MULTU (MOVWconst [-1]) x ))
+       // match: (Select1 (MULTU x (MOVWconst [1])))
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Select1 (MULTU (MOVWconst [-1]) x))
        // cond:
        // result: (NEG <x.Type> x)
        for {
@@ -8066,7 +8400,28 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select1 (MULTU (MOVWconst [c]) x ))
+       // match: (Select1 (MULTU x (MOVWconst [-1])))
+       // cond:
+       // result: (NEG <x.Type> x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               if v_0_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpMIPSNEG)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Select1 (MULTU (MOVWconst [c]) x))
        // cond: isPowerOfTwo(int64(uint32(c)))
        // result: (SLLconst [log2(int64(uint32(c)))] x)
        for {
@@ -8088,7 +8443,29 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select1 (MULTU  (MOVWconst [c]) (MOVWconst [d])))
+       // match: (Select1 (MULTU x (MOVWconst [c])))
+       // cond: isPowerOfTwo(int64(uint32(c)))
+       // result: (SLLconst [log2(int64(uint32(c)))] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isPowerOfTwo(int64(uint32(c)))) {
+                       break
+               }
+               v.reset(OpMIPSSLLconst)
+               v.AuxInt = log2(int64(uint32(c)))
+               v.AddArg(x)
+               return true
+       }
+       // match: (Select1 (MULTU (MOVWconst [c]) (MOVWconst [d])))
        // cond:
        // result: (MOVWconst [int64(int32(uint32(c)*uint32(d)))])
        for {
@@ -8110,7 +8487,29 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AuxInt = int64(int32(uint32(c) * uint32(d)))
                return true
        }
-       // match: (Select1 (DIV  (MOVWconst [c]) (MOVWconst [d])))
+       // match: (Select1 (MULTU (MOVWconst [d]) (MOVWconst [c])))
+       // cond:
+       // result: (MOVWconst [int64(int32(uint32(c)*uint32(d)))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPSMULTU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPSMOVWconst {
+                       break
+               }
+               d := v_0_0.AuxInt
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPSMOVWconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               v.reset(OpMIPSMOVWconst)
+               v.AuxInt = int64(int32(uint32(c) * uint32(d)))
+               return true
+       }
+       // match: (Select1 (DIV (MOVWconst [c]) (MOVWconst [d])))
        // cond:
        // result: (MOVWconst [int64(int32(c)/int32(d))])
        for {
@@ -9184,7 +9583,7 @@ func rewriteBlockMIPS(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (EQ  (MOVWconst [0]) yes no)
+               // match: (EQ (MOVWconst [0]) yes no)
                // cond:
                // result: (First nil yes no)
                for {
@@ -9203,7 +9602,7 @@ func rewriteBlockMIPS(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (EQ  (MOVWconst [c]) yes no)
+               // match: (EQ (MOVWconst [c]) yes no)
                // cond: c != 0
                // result: (First nil no yes)
                for {
@@ -9655,7 +10054,7 @@ func rewriteBlockMIPS(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (NE  (MOVWconst [0]) yes no)
+               // match: (NE (MOVWconst [0]) yes no)
                // cond:
                // result: (First nil no yes)
                for {
@@ -9675,7 +10074,7 @@ func rewriteBlockMIPS(b *Block) bool {
                        _ = yes
                        return true
                }
-               // match: (NE  (MOVWconst [c]) yes no)
+               // match: (NE (MOVWconst [c]) yes no)
                // cond: c != 0
                // result: (First nil yes no)
                for {
index e2f8a22b2e349014b37e434985f443ff00c154f5..8b4ff379a488a9b8b0e05276e02e7dd52a430a19 100644 (file)
@@ -2694,7 +2694,7 @@ func rewriteValueMIPS64_OpLsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x8  <t> x y)
+       // match: (Lsh16x8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
        for {
@@ -2818,7 +2818,7 @@ func rewriteValueMIPS64_OpLsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x8  <t> x y)
+       // match: (Lsh32x8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
        for {
@@ -2942,7 +2942,7 @@ func rewriteValueMIPS64_OpLsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x8  <t> x y)
+       // match: (Lsh64x8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
        for {
@@ -3066,7 +3066,7 @@ func rewriteValueMIPS64_OpLsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x8  <t> x y)
+       // match: (Lsh8x8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
        for {
@@ -3094,16 +3094,16 @@ func rewriteValueMIPS64_OpLsh8x8(v *Value) bool {
        }
 }
 func rewriteValueMIPS64_OpMIPS64ADDV(v *Value) bool {
-       // match: (ADDV (MOVVconst [c]) x)
+       // match: (ADDV x (MOVVconst [c]))
        // cond: is32Bit(c)
        // result: (ADDVconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPS64MOVVconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -3112,16 +3112,16 @@ func rewriteValueMIPS64_OpMIPS64ADDV(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDV x (MOVVconst [c]))
+       // match: (ADDV (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (ADDVconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPS64MOVVconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -3181,7 +3181,7 @@ func rewriteValueMIPS64_OpMIPS64ADDVconst(v *Value) bool {
                v.AddArg(ptr)
                return true
        }
-       // match: (ADDVconst [0]  x)
+       // match: (ADDVconst [0] x)
        // cond:
        // result: x
        for {
@@ -3249,16 +3249,16 @@ func rewriteValueMIPS64_OpMIPS64ADDVconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64AND(v *Value) bool {
-       // match: (AND (MOVVconst [c]) x)
+       // match: (AND x (MOVVconst [c]))
        // cond: is32Bit(c)
        // result: (ANDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPS64MOVVconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -3267,16 +3267,16 @@ func rewriteValueMIPS64_OpMIPS64AND(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (AND x (MOVVconst [c]))
+       // match: (AND (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (ANDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPS64MOVVconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -3301,7 +3301,7 @@ func rewriteValueMIPS64_OpMIPS64AND(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64ANDconst(v *Value) bool {
-       // match: (ANDconst [0]  _)
+       // match: (ANDconst [0] _)
        // cond:
        // result: (MOVVconst [0])
        for {
@@ -3448,7 +3448,7 @@ func rewriteValueMIPS64_OpMIPS64MOVBUreg(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVBload(v *Value) bool {
-       // match: (MOVBload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -3522,7 +3522,7 @@ func rewriteValueMIPS64_OpMIPS64MOVBreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBreg  (MOVVconst [c]))
+       // match: (MOVBreg (MOVVconst [c]))
        // cond:
        // result: (MOVVconst [int64(int8(c))])
        for {
@@ -3790,7 +3790,7 @@ func rewriteValueMIPS64_OpMIPS64MOVBstorezero(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVDload(v *Value) bool {
-       // match: (MOVDload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVDload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVDload  [off1+off2] {sym} ptr mem)
        for {
@@ -3894,7 +3894,7 @@ func rewriteValueMIPS64_OpMIPS64MOVDstore(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVFload(v *Value) bool {
-       // match: (MOVFload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVFload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVFload  [off1+off2] {sym} ptr mem)
        for {
@@ -4112,7 +4112,7 @@ func rewriteValueMIPS64_OpMIPS64MOVHUreg(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVHload(v *Value) bool {
-       // match: (MOVHload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVHload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVHload  [off1+off2] {sym} ptr mem)
        for {
@@ -4234,7 +4234,7 @@ func rewriteValueMIPS64_OpMIPS64MOVHreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg  (MOVVconst [c]))
+       // match: (MOVHreg (MOVVconst [c]))
        // cond:
        // result: (MOVVconst [int64(int16(c))])
        for {
@@ -4460,7 +4460,7 @@ func rewriteValueMIPS64_OpMIPS64MOVHstorezero(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVVload(v *Value) bool {
-       // match: (MOVVload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVVload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVVload  [off1+off2] {sym} ptr mem)
        for {
@@ -4522,7 +4522,7 @@ func rewriteValueMIPS64_OpMIPS64MOVVreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVVreg  (MOVVconst [c]))
+       // match: (MOVVreg (MOVVconst [c]))
        // cond:
        // result: (MOVVconst [c])
        for {
@@ -4802,7 +4802,7 @@ func rewriteValueMIPS64_OpMIPS64MOVWUreg(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64MOVWload(v *Value) bool {
-       // match: (MOVWload  [off1] {sym} (ADDVconst [off2] ptr) mem)
+       // match: (MOVWload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
        // result: (MOVWload  [off1+off2] {sym} ptr mem)
        for {
@@ -4972,7 +4972,7 @@ func rewriteValueMIPS64_OpMIPS64MOVWreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg  (MOVVconst [c]))
+       // match: (MOVWreg (MOVVconst [c]))
        // cond:
        // result: (MOVVconst [int64(int32(c))])
        for {
@@ -5172,16 +5172,16 @@ func rewriteValueMIPS64_OpMIPS64NEGV(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64NOR(v *Value) bool {
-       // match: (NOR (MOVVconst [c]) x)
+       // match: (NOR x (MOVVconst [c]))
        // cond: is32Bit(c)
        // result: (NORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPS64MOVVconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -5190,16 +5190,16 @@ func rewriteValueMIPS64_OpMIPS64NOR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (NOR x (MOVVconst [c]))
+       // match: (NOR (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (NORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPS64MOVVconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -5228,16 +5228,16 @@ func rewriteValueMIPS64_OpMIPS64NORconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64OR(v *Value) bool {
-       // match: (OR  (MOVVconst [c]) x)
+       // match: (OR x (MOVVconst [c]))
        // cond: is32Bit(c)
        // result: (ORconst  [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPS64MOVVconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -5246,16 +5246,16 @@ func rewriteValueMIPS64_OpMIPS64OR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (OR  x (MOVVconst [c]))
+       // match: (OR (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (ORconst  [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPS64MOVVconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -5264,7 +5264,7 @@ func rewriteValueMIPS64_OpMIPS64OR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (OR  x x)
+       // match: (OR x x)
        // cond:
        // result: x
        for {
@@ -5280,7 +5280,7 @@ func rewriteValueMIPS64_OpMIPS64OR(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64ORconst(v *Value) bool {
-       // match: (ORconst  [0]  x)
+       // match: (ORconst [0] x)
        // cond:
        // result: x
        for {
@@ -5293,7 +5293,7 @@ func rewriteValueMIPS64_OpMIPS64ORconst(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORconst  [-1] _)
+       // match: (ORconst [-1] _)
        // cond:
        // result: (MOVVconst [-1])
        for {
@@ -5340,7 +5340,7 @@ func rewriteValueMIPS64_OpMIPS64ORconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64SGT(v *Value) bool {
-       // match: (SGT  (MOVVconst [c]) x)
+       // match: (SGT (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (SGTconst  [c] x)
        for {
@@ -5904,7 +5904,7 @@ func rewriteValueMIPS64_OpMIPS64SUBV(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64SUBVconst(v *Value) bool {
-       // match: (SUBVconst [0]  x)
+       // match: (SUBVconst [0] x)
        // cond:
        // result: x
        for {
@@ -5972,16 +5972,16 @@ func rewriteValueMIPS64_OpMIPS64SUBVconst(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64XOR(v *Value) bool {
-       // match: (XOR (MOVVconst [c]) x)
+       // match: (XOR x (MOVVconst [c]))
        // cond: is32Bit(c)
        // result: (XORconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpMIPS64MOVVconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -5990,16 +5990,16 @@ func rewriteValueMIPS64_OpMIPS64XOR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XOR x (MOVVconst [c]))
+       // match: (XOR (MOVVconst [c]) x)
        // cond: is32Bit(c)
        // result: (XORconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpMIPS64MOVVconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MOVVconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -6023,7 +6023,7 @@ func rewriteValueMIPS64_OpMIPS64XOR(v *Value) bool {
        return false
 }
 func rewriteValueMIPS64_OpMIPS64XORconst(v *Value) bool {
-       // match: (XORconst [0]  x)
+       // match: (XORconst [0] x)
        // cond:
        // result: x
        for {
@@ -7354,7 +7354,7 @@ func rewriteValueMIPS64_OpRsh16Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16Ux8  <t> x y)
+       // match: (Rsh16Ux8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt16to64 x) (ZeroExt8to64  y)))
        for {
@@ -7486,7 +7486,7 @@ func rewriteValueMIPS64_OpRsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x8  <t> x y)
+       // match: (Rsh16x8 <t> x y)
        // cond:
        // result: (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (Const64 <types.UInt64> [63]))) (ZeroExt8to64  y)))
        for {
@@ -7618,7 +7618,7 @@ func rewriteValueMIPS64_OpRsh32Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32Ux8  <t> x y)
+       // match: (Rsh32Ux8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt32to64 x) (ZeroExt8to64  y)))
        for {
@@ -7750,7 +7750,7 @@ func rewriteValueMIPS64_OpRsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x8  <t> x y)
+       // match: (Rsh32x8 <t> x y)
        // cond:
        // result: (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (Const64 <types.UInt64> [63]))) (ZeroExt8to64  y)))
        for {
@@ -7876,7 +7876,7 @@ func rewriteValueMIPS64_OpRsh64Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64Ux8  <t> x y)
+       // match: (Rsh64Ux8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> x (ZeroExt8to64  y)))
        for {
@@ -8000,7 +8000,7 @@ func rewriteValueMIPS64_OpRsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x8  <t> x y)
+       // match: (Rsh64x8 <t> x y)
        // cond:
        // result: (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (Const64 <types.UInt64> [63]))) (ZeroExt8to64  y)))
        for {
@@ -8130,7 +8130,7 @@ func rewriteValueMIPS64_OpRsh8Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux8  <t> x y)
+       // match: (Rsh8Ux8 <t> x y)
        // cond:
        // result: (AND (NEGV <t> (SGTU (Const64 <types.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt8to64 x) (ZeroExt8to64  y)))
        for {
@@ -8262,7 +8262,7 @@ func rewriteValueMIPS64_OpRsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x8  <t> x y)
+       // match: (Rsh8x8 <t> x y)
        // cond:
        // result: (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (Const64 <types.UInt64> [63]))) (ZeroExt8to64  y)))
        for {
@@ -8333,7 +8333,7 @@ func rewriteValueMIPS64_OpSelect0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Select0 (DIVV  (MOVVconst [c]) (MOVVconst [d])))
+       // match: (Select0 (DIVV (MOVVconst [c]) (MOVVconst [d])))
        // cond:
        // result: (MOVVconst [int64(c)%int64(d)])
        for {
@@ -8400,6 +8400,26 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU (MOVVconst [-1]) x))
+       // cond:
+       // result: (NEGV x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_0.AuxInt != -1 {
+                       break
+               }
+               x := v_0.Args[1]
+               v.reset(OpMIPS64NEGV)
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (MULVU _ (MOVVconst [0])))
        // cond:
        // result: (MOVVconst [0])
@@ -8419,6 +8439,25 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (Select1 (MULVU (MOVVconst [0]) _))
+       // cond:
+       // result: (MOVVconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_0.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpMIPS64MOVVconst)
+               v.AuxInt = 0
+               return true
+       }
        // match: (Select1 (MULVU x (MOVVconst [1])))
        // cond:
        // result: x
@@ -8440,6 +8479,27 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU (MOVVconst [1]) x))
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (MULVU x (MOVVconst [c])))
        // cond: isPowerOfTwo(c)
        // result: (SLLVconst [log2(c)] x)
@@ -8462,6 +8522,28 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU (MOVVconst [c]) x))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLVconst [log2(c)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               c := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpMIPS64SLLVconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (MULVU (MOVVconst [-1]) x))
        // cond:
        // result: (NEGV x)
@@ -8482,6 +8564,26 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU x (MOVVconst [-1])))
+       // cond:
+       // result: (NEGV x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpMIPS64NEGV)
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (MULVU (MOVVconst [0]) _))
        // cond:
        // result: (MOVVconst [0])
@@ -8501,6 +8603,25 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (Select1 (MULVU _ (MOVVconst [0])))
+       // cond:
+       // result: (MOVVconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpMIPS64MOVVconst)
+               v.AuxInt = 0
+               return true
+       }
        // match: (Select1 (MULVU (MOVVconst [1]) x))
        // cond:
        // result: x
@@ -8522,6 +8643,27 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU x (MOVVconst [1])))
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               if v_0_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (MULVU (MOVVconst [c]) x))
        // cond: isPowerOfTwo(c)
        // result: (SLLVconst [log2(c)] x)
@@ -8544,6 +8686,28 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Select1 (MULVU x (MOVVconst [c])))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLVconst [log2(c)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpMIPS64SLLVconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
        // match: (Select1 (DIVVU x (MOVVconst [1])))
        // cond:
        // result: x
@@ -8609,7 +8773,29 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AuxInt = c * d
                return true
        }
-       // match: (Select1 (DIVV  (MOVVconst [c]) (MOVVconst [d])))
+       // match: (Select1 (MULVU (MOVVconst [d]) (MOVVconst [c])))
+       // cond:
+       // result: (MOVVconst [c*d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMIPS64MULVU {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               d := v_0_0.AuxInt
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpMIPS64MOVVconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               v.reset(OpMIPS64MOVVconst)
+               v.AuxInt = c * d
+               return true
+       }
+       // match: (Select1 (DIVV (MOVVconst [c]) (MOVVconst [d])))
        // cond:
        // result: (MOVVconst [int64(c)/int64(d)])
        for {
@@ -9836,7 +10022,7 @@ func rewriteBlockMIPS64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (EQ  (MOVVconst [0]) yes no)
+               // match: (EQ (MOVVconst [0]) yes no)
                // cond:
                // result: (First nil yes no)
                for {
@@ -9855,7 +10041,7 @@ func rewriteBlockMIPS64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (EQ  (MOVVconst [c]) yes no)
+               // match: (EQ (MOVVconst [c]) yes no)
                // cond: c != 0
                // result: (First nil no yes)
                for {
@@ -10275,7 +10461,7 @@ func rewriteBlockMIPS64(b *Block) bool {
                        _ = no
                        return true
                }
-               // match: (NE  (MOVVconst [0]) yes no)
+               // match: (NE (MOVVconst [0]) yes no)
                // cond:
                // result: (First nil no yes)
                for {
@@ -10295,7 +10481,7 @@ func rewriteBlockMIPS64(b *Block) bool {
                        _ = yes
                        return true
                }
-               // match: (NE  (MOVVconst [c]) yes no)
+               // match: (NE (MOVVconst [c]) yes no)
                // cond: c != 0
                // result: (First nil yes no)
                for {
index 090519bd32645b7a2ef185ac5a92dc07c6786fa9..6c9e1e54e0893684f774c43821bcae0afeaf0f25 100644 (file)
@@ -600,7 +600,7 @@ func rewriteValuePPC64(v *Value) bool {
        return false
 }
 func rewriteValuePPC64_OpAdd16(v *Value) bool {
-       // match: (Add16  x y)
+       // match: (Add16 x y)
        // cond:
        // result: (ADD x y)
        for {
@@ -613,7 +613,7 @@ func rewriteValuePPC64_OpAdd16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAdd32(v *Value) bool {
-       // match: (Add32  x y)
+       // match: (Add32 x y)
        // cond:
        // result: (ADD x y)
        for {
@@ -639,7 +639,7 @@ func rewriteValuePPC64_OpAdd32F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAdd64(v *Value) bool {
-       // match: (Add64  x y)
+       // match: (Add64 x y)
        // cond:
        // result: (ADD  x y)
        for {
@@ -665,7 +665,7 @@ func rewriteValuePPC64_OpAdd64F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAdd8(v *Value) bool {
-       // match: (Add8   x y)
+       // match: (Add8 x y)
        // cond:
        // result: (ADD x y)
        for {
@@ -743,7 +743,7 @@ func rewriteValuePPC64_OpAnd64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAnd8(v *Value) bool {
-       // match: (And8  x y)
+       // match: (And8 x y)
        // cond:
        // result: (AND x y)
        for {
@@ -878,7 +878,7 @@ func rewriteValuePPC64_OpAtomicExchange64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAtomicLoad32(v *Value) bool {
-       // match: (AtomicLoad32  ptr mem)
+       // match: (AtomicLoad32 ptr mem)
        // cond:
        // result: (LoweredAtomicLoad32 ptr mem)
        for {
@@ -891,7 +891,7 @@ func rewriteValuePPC64_OpAtomicLoad32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAtomicLoad64(v *Value) bool {
-       // match: (AtomicLoad64  ptr mem)
+       // match: (AtomicLoad64 ptr mem)
        // cond:
        // result: (LoweredAtomicLoad64 ptr mem)
        for {
@@ -917,7 +917,7 @@ func rewriteValuePPC64_OpAtomicLoadPtr(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAtomicOr8(v *Value) bool {
-       // match: (AtomicOr8  ptr val mem)
+       // match: (AtomicOr8 ptr val mem)
        // cond:
        // result: (LoweredAtomicOr8  ptr val mem)
        for {
@@ -932,7 +932,7 @@ func rewriteValuePPC64_OpAtomicOr8(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAtomicStore32(v *Value) bool {
-       // match: (AtomicStore32      ptr val mem)
+       // match: (AtomicStore32 ptr val mem)
        // cond:
        // result: (LoweredAtomicStore32 ptr val mem)
        for {
@@ -947,7 +947,7 @@ func rewriteValuePPC64_OpAtomicStore32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpAtomicStore64(v *Value) bool {
-       // match: (AtomicStore64      ptr val mem)
+       // match: (AtomicStore64 ptr val mem)
        // cond:
        // result: (LoweredAtomicStore64 ptr val mem)
        for {
@@ -1037,7 +1037,7 @@ func rewriteValuePPC64_OpCom64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpCom8(v *Value) bool {
-       // match: (Com8  x)
+       // match: (Com8 x)
        // cond:
        // result: (NOR x x)
        for {
@@ -1049,7 +1049,7 @@ func rewriteValuePPC64_OpCom8(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpConst16(v *Value) bool {
-       // match: (Const16  [val])
+       // match: (Const16 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1060,7 +1060,7 @@ func rewriteValuePPC64_OpConst16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpConst32(v *Value) bool {
-       // match: (Const32  [val])
+       // match: (Const32 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1082,7 +1082,7 @@ func rewriteValuePPC64_OpConst32F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpConst64(v *Value) bool {
-       // match: (Const64  [val])
+       // match: (Const64 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1104,7 +1104,7 @@ func rewriteValuePPC64_OpConst64F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpConst8(v *Value) bool {
-       // match: (Const8   [val])
+       // match: (Const8 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1322,7 +1322,7 @@ func rewriteValuePPC64_OpDiv16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div16  x y)
+       // match: (Div16 x y)
        // cond:
        // result: (DIVW  (SignExt16to32 x) (SignExt16to32 y))
        for {
@@ -1360,7 +1360,7 @@ func rewriteValuePPC64_OpDiv16u(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpDiv32(v *Value) bool {
-       // match: (Div32  x y)
+       // match: (Div32 x y)
        // cond:
        // result: (DIVW  x y)
        for {
@@ -1399,7 +1399,7 @@ func rewriteValuePPC64_OpDiv32u(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpDiv64(v *Value) bool {
-       // match: (Div64  x y)
+       // match: (Div64 x y)
        // cond:
        // result: (DIVD  x y)
        for {
@@ -1442,7 +1442,7 @@ func rewriteValuePPC64_OpDiv8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8   x y)
+       // match: (Div8 x y)
        // cond:
        // result: (DIVW  (SignExt8to32 x) (SignExt8to32 y))
        for {
@@ -1463,7 +1463,7 @@ func rewriteValuePPC64_OpDiv8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8u  x y)
+       // match: (Div8u x y)
        // cond:
        // result: (DIVWU (ZeroExt8to32 x) (ZeroExt8to32 y))
        for {
@@ -2068,7 +2068,7 @@ func rewriteValuePPC64_OpGreater8U(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpHmul32(v *Value) bool {
-       // match: (Hmul32  x y)
+       // match: (Hmul32 x y)
        // cond:
        // result: (MULHW  x y)
        for {
@@ -2081,7 +2081,7 @@ func rewriteValuePPC64_OpHmul32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpHmul32u(v *Value) bool {
-       // match: (Hmul32u  x y)
+       // match: (Hmul32u x y)
        // cond:
        // result: (MULHWU x y)
        for {
@@ -2094,7 +2094,7 @@ func rewriteValuePPC64_OpHmul32u(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpHmul64(v *Value) bool {
-       // match: (Hmul64  x y)
+       // match: (Hmul64 x y)
        // cond:
        // result: (MULHD  x y)
        for {
@@ -2107,7 +2107,7 @@ func rewriteValuePPC64_OpHmul64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpHmul64u(v *Value) bool {
-       // match: (Hmul64u  x y)
+       // match: (Hmul64u x y)
        // cond:
        // result: (MULHDU x y)
        for {
@@ -2763,7 +2763,7 @@ func rewriteValuePPC64_OpLsh16x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x32  x (Const64 [c]))
+       // match: (Lsh16x32 x (Const64 [c]))
        // cond: uint32(c) < 16
        // result: (SLWconst x [c])
        for {
@@ -2781,7 +2781,7 @@ func rewriteValuePPC64_OpLsh16x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh16x32  x (MOVDconst [c]))
+       // match: (Lsh16x32 x (MOVDconst [c]))
        // cond: uint32(c) < 16
        // result: (SLWconst x [c])
        for {
@@ -2826,7 +2826,7 @@ func rewriteValuePPC64_OpLsh16x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x64  x (Const64 [c]))
+       // match: (Lsh16x64 x (Const64 [c]))
        // cond: uint64(c) < 16
        // result: (SLWconst x [c])
        for {
@@ -2844,7 +2844,7 @@ func rewriteValuePPC64_OpLsh16x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh16x64  _ (Const64 [c]))
+       // match: (Lsh16x64 _ (Const64 [c]))
        // cond: uint64(c) >= 16
        // result: (MOVDconst [0])
        for {
@@ -2860,7 +2860,7 @@ func rewriteValuePPC64_OpLsh16x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh16x64  x (MOVDconst [c]))
+       // match: (Lsh16x64 x (MOVDconst [c]))
        // cond: uint64(c) < 16
        // result: (SLWconst x [c])
        for {
@@ -2957,7 +2957,7 @@ func rewriteValuePPC64_OpLsh32x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x32  x (Const64 [c]))
+       // match: (Lsh32x32 x (Const64 [c]))
        // cond: uint32(c) < 32
        // result: (SLWconst x [c])
        for {
@@ -2975,7 +2975,7 @@ func rewriteValuePPC64_OpLsh32x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh32x32  x (MOVDconst [c]))
+       // match: (Lsh32x32 x (MOVDconst [c]))
        // cond: uint32(c) < 32
        // result: (SLWconst x [c])
        for {
@@ -3020,7 +3020,7 @@ func rewriteValuePPC64_OpLsh32x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x64  x (Const64 [c]))
+       // match: (Lsh32x64 x (Const64 [c]))
        // cond: uint64(c) < 32
        // result: (SLWconst x [c])
        for {
@@ -3038,7 +3038,7 @@ func rewriteValuePPC64_OpLsh32x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh32x64  _ (Const64 [c]))
+       // match: (Lsh32x64 _ (Const64 [c]))
        // cond: uint64(c) >= 32
        // result: (MOVDconst [0])
        for {
@@ -3054,7 +3054,7 @@ func rewriteValuePPC64_OpLsh32x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh32x64  x (MOVDconst [c]))
+       // match: (Lsh32x64 x (MOVDconst [c]))
        // cond: uint64(c) < 32
        // result: (SLWconst x [c])
        for {
@@ -3151,7 +3151,7 @@ func rewriteValuePPC64_OpLsh64x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x32  x (Const64 [c]))
+       // match: (Lsh64x32 x (Const64 [c]))
        // cond: uint32(c) < 64
        // result: (SLDconst x [c])
        for {
@@ -3169,7 +3169,7 @@ func rewriteValuePPC64_OpLsh64x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh64x32  x (MOVDconst [c]))
+       // match: (Lsh64x32 x (MOVDconst [c]))
        // cond: uint32(c) < 64
        // result: (SLDconst x [c])
        for {
@@ -3214,7 +3214,7 @@ func rewriteValuePPC64_OpLsh64x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x64  x (Const64 [c]))
+       // match: (Lsh64x64 x (Const64 [c]))
        // cond: uint64(c) < 64
        // result: (SLDconst x [c])
        for {
@@ -3232,7 +3232,7 @@ func rewriteValuePPC64_OpLsh64x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh64x64  _ (Const64 [c]))
+       // match: (Lsh64x64 _ (Const64 [c]))
        // cond: uint64(c) >= 64
        // result: (MOVDconst [0])
        for {
@@ -3248,7 +3248,7 @@ func rewriteValuePPC64_OpLsh64x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh64x64  x (MOVDconst [c]))
+       // match: (Lsh64x64 x (MOVDconst [c]))
        // cond: uint64(c) < 64
        // result: (SLDconst x [c])
        for {
@@ -3345,7 +3345,7 @@ func rewriteValuePPC64_OpLsh8x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x32   x (Const64 [c]))
+       // match: (Lsh8x32 x (Const64 [c]))
        // cond: uint32(c) < 8
        // result: (SLWconst x [c])
        for {
@@ -3363,7 +3363,7 @@ func rewriteValuePPC64_OpLsh8x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh8x32   x (MOVDconst [c]))
+       // match: (Lsh8x32 x (MOVDconst [c]))
        // cond: uint32(c) < 8
        // result: (SLWconst x [c])
        for {
@@ -3408,7 +3408,7 @@ func rewriteValuePPC64_OpLsh8x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x64   x (Const64 [c]))
+       // match: (Lsh8x64 x (Const64 [c]))
        // cond: uint64(c) < 8
        // result: (SLWconst x [c])
        for {
@@ -3426,7 +3426,7 @@ func rewriteValuePPC64_OpLsh8x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh8x64   _ (Const64 [c]))
+       // match: (Lsh8x64 _ (Const64 [c]))
        // cond: uint64(c) >= 8
        // result: (MOVDconst [0])
        for {
@@ -3442,7 +3442,7 @@ func rewriteValuePPC64_OpLsh8x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh8x64   x (MOVDconst [c]))
+       // match: (Lsh8x64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SLWconst x [c])
        for {
@@ -3739,6 +3739,212 @@ func rewriteValuePPC64_OpMove(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (Move [4] {t} dst src mem)
+       // cond: t.(Type).Alignment()%4 == 0
+       // result: (MOVWstore dst (MOVWload src mem) mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               t := v.Aux
+               dst := v.Args[0]
+               src := v.Args[1]
+               mem := v.Args[2]
+               if !(t.(Type).Alignment()%4 == 0) {
+                       break
+               }
+               v.reset(OpPPC64MOVWstore)
+               v.AddArg(dst)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVWload, types.Int32)
+               v0.AddArg(src)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (Move [4] {t} dst src mem)
+       // cond: t.(Type).Alignment()%2 == 0
+       // result: (MOVHstore [2] dst (MOVHZload [2] src mem)           (MOVHstore dst (MOVHZload src mem) mem))
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               t := v.Aux
+               dst := v.Args[0]
+               src := v.Args[1]
+               mem := v.Args[2]
+               if !(t.(Type).Alignment()%2 == 0) {
+                       break
+               }
+               v.reset(OpPPC64MOVHstore)
+               v.AuxInt = 2
+               v.AddArg(dst)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
+               v0.AuxInt = 2
+               v0.AddArg(src)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpPPC64MOVHstore, TypeMem)
+               v1.AddArg(dst)
+               v2 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
+               v2.AddArg(src)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v.AddArg(v1)
+               return true
+       }
+       // match: (Move [4] dst src mem)
+       // cond:
+       // result: (MOVBstore [3] dst (MOVBZload [3] src mem)           (MOVBstore [2] dst (MOVBZload [2] src mem)                      (MOVBstore [1] dst (MOVBZload [1] src mem)                              (MOVBstore dst (MOVBZload src mem) mem))))
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               dst := v.Args[0]
+               src := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpPPC64MOVBstore)
+               v.AuxInt = 3
+               v.AddArg(dst)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVBZload, types.UInt8)
+               v0.AuxInt = 3
+               v0.AddArg(src)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpPPC64MOVBstore, TypeMem)
+               v1.AuxInt = 2
+               v1.AddArg(dst)
+               v2 := b.NewValue0(v.Pos, OpPPC64MOVBZload, types.UInt8)
+               v2.AuxInt = 2
+               v2.AddArg(src)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpPPC64MOVBstore, TypeMem)
+               v3.AuxInt = 1
+               v3.AddArg(dst)
+               v4 := b.NewValue0(v.Pos, OpPPC64MOVBZload, types.UInt8)
+               v4.AuxInt = 1
+               v4.AddArg(src)
+               v4.AddArg(mem)
+               v3.AddArg(v4)
+               v5 := b.NewValue0(v.Pos, OpPPC64MOVBstore, TypeMem)
+               v5.AddArg(dst)
+               v6 := b.NewValue0(v.Pos, OpPPC64MOVBZload, types.UInt8)
+               v6.AddArg(src)
+               v6.AddArg(mem)
+               v5.AddArg(v6)
+               v5.AddArg(mem)
+               v3.AddArg(v5)
+               v1.AddArg(v3)
+               v.AddArg(v1)
+               return true
+       }
+       // match: (Move [8] {t} dst src mem)
+       // cond: t.(Type).Alignment()%8 == 0
+       // result: (MOVDstore dst (MOVDload src mem) mem)
+       for {
+               if v.AuxInt != 8 {
+                       break
+               }
+               t := v.Aux
+               dst := v.Args[0]
+               src := v.Args[1]
+               mem := v.Args[2]
+               if !(t.(Type).Alignment()%8 == 0) {
+                       break
+               }
+               v.reset(OpPPC64MOVDstore)
+               v.AddArg(dst)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVDload, types.Int64)
+               v0.AddArg(src)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (Move [8] {t} dst src mem)
+       // cond: t.(Type).Alignment()%4 == 0
+       // result: (MOVWstore [4] dst (MOVWZload [4] src mem)           (MOVWstore dst (MOVWZload src mem) mem))
+       for {
+               if v.AuxInt != 8 {
+                       break
+               }
+               t := v.Aux
+               dst := v.Args[0]
+               src := v.Args[1]
+               mem := v.Args[2]
+               if !(t.(Type).Alignment()%4 == 0) {
+                       break
+               }
+               v.reset(OpPPC64MOVWstore)
+               v.AuxInt = 4
+               v.AddArg(dst)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVWZload, types.UInt32)
+               v0.AuxInt = 4
+               v0.AddArg(src)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpPPC64MOVWstore, TypeMem)
+               v1.AddArg(dst)
+               v2 := b.NewValue0(v.Pos, OpPPC64MOVWZload, types.UInt32)
+               v2.AddArg(src)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v.AddArg(v1)
+               return true
+       }
+       // match: (Move [8] {t} dst src mem)
+       // cond: t.(Type).Alignment()%2 == 0
+       // result: (MOVHstore [6] dst (MOVHZload [6] src mem)           (MOVHstore [4] dst (MOVHZload [4] src mem)                      (MOVHstore [2] dst (MOVHZload [2] src mem)                              (MOVHstore dst (MOVHZload src mem) mem))))
+       for {
+               if v.AuxInt != 8 {
+                       break
+               }
+               t := v.Aux
+               dst := v.Args[0]
+               src := v.Args[1]
+               mem := v.Args[2]
+               if !(t.(Type).Alignment()%2 == 0) {
+                       break
+               }
+               v.reset(OpPPC64MOVHstore)
+               v.AuxInt = 6
+               v.AddArg(dst)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
+               v0.AuxInt = 6
+               v0.AddArg(src)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpPPC64MOVHstore, TypeMem)
+               v1.AuxInt = 4
+               v1.AddArg(dst)
+               v2 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
+               v2.AuxInt = 4
+               v2.AddArg(src)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpPPC64MOVHstore, TypeMem)
+               v3.AuxInt = 2
+               v3.AddArg(dst)
+               v4 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
+               v4.AuxInt = 2
+               v4.AddArg(src)
+               v4.AddArg(mem)
+               v3.AddArg(v4)
+               v5 := b.NewValue0(v.Pos, OpPPC64MOVHstore, TypeMem)
+               v5.AddArg(dst)
+               v6 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
+               v6.AddArg(src)
+               v6.AddArg(mem)
+               v5.AddArg(v6)
+               v5.AddArg(mem)
+               v3.AddArg(v5)
+               v1.AddArg(v3)
+               v.AddArg(v1)
+               return true
+       }
        // match: (Move [3] dst src mem)
        // cond:
        // result: (MOVBstore [2] dst (MOVBZload [2] src mem)                 (MOVHstore dst (MOVHload src mem) mem))
@@ -3919,7 +4125,7 @@ func rewriteValuePPC64_OpMove(v *Value) bool {
        return false
 }
 func rewriteValuePPC64_OpMul16(v *Value) bool {
-       // match: (Mul16  x y)
+       // match: (Mul16 x y)
        // cond:
        // result: (MULLW x y)
        for {
@@ -3932,7 +4138,7 @@ func rewriteValuePPC64_OpMul16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpMul32(v *Value) bool {
-       // match: (Mul32  x y)
+       // match: (Mul32 x y)
        // cond:
        // result: (MULLW  x y)
        for {
@@ -3958,7 +4164,7 @@ func rewriteValuePPC64_OpMul32F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpMul64(v *Value) bool {
-       // match: (Mul64  x y)
+       // match: (Mul64 x y)
        // cond:
        // result: (MULLD  x y)
        for {
@@ -3984,7 +4190,7 @@ func rewriteValuePPC64_OpMul64F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpMul8(v *Value) bool {
-       // match: (Mul8   x y)
+       // match: (Mul8 x y)
        // cond:
        // result: (MULLW x y)
        for {
@@ -3997,7 +4203,7 @@ func rewriteValuePPC64_OpMul8(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpNeg16(v *Value) bool {
-       // match: (Neg16  x)
+       // match: (Neg16 x)
        // cond:
        // result: (NEG x)
        for {
@@ -4008,7 +4214,7 @@ func rewriteValuePPC64_OpNeg16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpNeg32(v *Value) bool {
-       // match: (Neg32  x)
+       // match: (Neg32 x)
        // cond:
        // result: (NEG x)
        for {
@@ -4030,7 +4236,7 @@ func rewriteValuePPC64_OpNeg32F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpNeg64(v *Value) bool {
-       // match: (Neg64  x)
+       // match: (Neg64 x)
        // cond:
        // result: (NEG x)
        for {
@@ -4052,7 +4258,7 @@ func rewriteValuePPC64_OpNeg64F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpNeg8(v *Value) bool {
-       // match: (Neg8   x)
+       // match: (Neg8 x)
        // cond:
        // result: (NEG x)
        for {
@@ -4330,7 +4536,7 @@ func rewriteValuePPC64_OpOr64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpOr8(v *Value) bool {
-       // match: (Or8  x y)
+       // match: (Or8 x y)
        // cond:
        // result: (OR x y)
        for {
@@ -4356,16 +4562,16 @@ func rewriteValuePPC64_OpOrB(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpPPC64ADD(v *Value) bool {
-       // match: (ADD (MOVDconst [c]) x)
+       // match: (ADD x (MOVDconst [c]))
        // cond: is32Bit(c)
        // result: (ADDconst [c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpPPC64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
@@ -4374,16 +4580,16 @@ func rewriteValuePPC64_OpPPC64ADD(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADD x (MOVDconst [c]))
+       // match: (ADD (MOVDconst [c]) x)
        // cond: is32Bit(c)
        // result: (ADDconst [c] x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpPPC64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
@@ -4466,6 +4672,24 @@ func rewriteValuePPC64_OpPPC64AND(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (AND (NOR y y) x)
+       // cond:
+       // result: (ANDN x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64NOR {
+                       break
+               }
+               y := v_0.Args[0]
+               if y != v_0.Args[1] {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpPPC64ANDN)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (AND (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [c&d])
@@ -4484,6 +4708,24 @@ func rewriteValuePPC64_OpPPC64AND(v *Value) bool {
                v.AuxInt = c & d
                return true
        }
+       // match: (AND (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c&d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpPPC64MOVDconst)
+               v.AuxInt = c & d
+               return true
+       }
        // match: (AND x (MOVDconst [c]))
        // cond: isU16Bit(c)
        // result: (ANDconst [c] x)
@@ -4556,6 +4798,42 @@ func rewriteValuePPC64_OpPPC64AND(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (AND x:(MOVBZload _ _) (MOVDconst [c]))
+       // cond:
+       // result: (ANDconst [c&0xFF] x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpPPC64MOVBZload {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpPPC64ANDconst)
+               v.AuxInt = c & 0xFF
+               v.AddArg(x)
+               return true
+       }
+       // match: (AND (MOVDconst [c]) x:(MOVBZload _ _))
+       // cond:
+       // result: (ANDconst [c&0xFF] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if x.Op != OpPPC64MOVBZload {
+                       break
+               }
+               v.reset(OpPPC64ANDconst)
+               v.AuxInt = c & 0xFF
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuePPC64_OpPPC64ANDconst(v *Value) bool {
@@ -5186,23 +5464,6 @@ func rewriteValuePPC64_OpPPC64Equal(v *Value) bool {
        return false
 }
 func rewriteValuePPC64_OpPPC64FADD(v *Value) bool {
-       // match: (FADD z (FMUL x y))
-       // cond:
-       // result: (FMADD x y z)
-       for {
-               z := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpPPC64FMUL {
-                       break
-               }
-               x := v_1.Args[0]
-               y := v_1.Args[1]
-               v.reset(OpPPC64FMADD)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(z)
-               return true
-       }
        // match: (FADD (FMUL x y) z)
        // cond:
        // result: (FMADD x y z)
@@ -5220,26 +5481,26 @@ func rewriteValuePPC64_OpPPC64FADD(v *Value) bool {
                v.AddArg(z)
                return true
        }
-       return false
-}
-func rewriteValuePPC64_OpPPC64FADDS(v *Value) bool {
-       // match: (FADDS z (FMULS x y))
+       // match: (FADD z (FMUL x y))
        // cond:
-       // result: (FMADDS x y z)
+       // result: (FMADD x y z)
        for {
                z := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpPPC64FMULS {
+               if v_1.Op != OpPPC64FMUL {
                        break
                }
                x := v_1.Args[0]
                y := v_1.Args[1]
-               v.reset(OpPPC64FMADDS)
+               v.reset(OpPPC64FMADD)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(z)
                return true
        }
+       return false
+}
+func rewriteValuePPC64_OpPPC64FADDS(v *Value) bool {
        // match: (FADDS (FMULS x y) z)
        // cond:
        // result: (FMADDS x y z)
@@ -5257,6 +5518,23 @@ func rewriteValuePPC64_OpPPC64FADDS(v *Value) bool {
                v.AddArg(z)
                return true
        }
+       // match: (FADDS z (FMULS x y))
+       // cond:
+       // result: (FMADDS x y z)
+       for {
+               z := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64FMULS {
+                       break
+               }
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               v.reset(OpPPC64FMADDS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               return true
+       }
        return false
 }
 func rewriteValuePPC64_OpPPC64FMOVDload(v *Value) bool {
@@ -6770,6 +7048,27 @@ func rewriteValuePPC64_OpPPC64MOVWZreg(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (MOVWZreg y:(AND _ (MOVDconst [c])))
+       // cond: uint64(c) <= 0xFFFFFFFF
+       // result: y
+       for {
+               y := v.Args[0]
+               if y.Op != OpPPC64AND {
+                       break
+               }
+               y_1 := y.Args[1]
+               if y_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := y_1.AuxInt
+               if !(uint64(c) <= 0xFFFFFFFF) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
        // match: (MOVWZreg y:(MOVWZreg _))
        // cond:
        // result: y
@@ -6913,6 +7212,27 @@ func rewriteValuePPC64_OpPPC64MOVWreg(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (MOVWreg y:(AND _ (MOVDconst [c])))
+       // cond: uint64(c) <= 0x7FFFFFFF
+       // result: y
+       for {
+               y := v.Args[0]
+               if y.Op != OpPPC64AND {
+                       break
+               }
+               y_1 := y.Args[1]
+               if y_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := y_1.AuxInt
+               if !(uint64(c) <= 0x7FFFFFFF) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
        // match: (MOVWreg y:(MOVWreg _))
        // cond:
        // result: y
@@ -7231,6 +7551,24 @@ func rewriteValuePPC64_OpPPC64OR(v *Value) bool {
                v.AuxInt = c | d
                return true
        }
+       // match: (OR (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c|d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpPPC64MOVDconst)
+               v.AuxInt = c | d
+               return true
+       }
        // match: (OR x (MOVDconst [c]))
        // cond: isU32Bit(c)
        // result: (ORconst [c] x)
@@ -7372,6 +7710,24 @@ func rewriteValuePPC64_OpPPC64XOR(v *Value) bool {
                v.AuxInt = c ^ d
                return true
        }
+       // match: (XOR (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c^d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpPPC64MOVDconst)
+               v.AuxInt = c ^ d
+               return true
+       }
        // match: (XOR x (MOVDconst [c]))
        // cond: isU32Bit(c)
        // result: (XORconst [c] x)
@@ -7708,7 +8064,7 @@ func rewriteValuePPC64_OpRsh16x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x32  x (Const64 [c]))
+       // match: (Rsh16x32 x (Const64 [c]))
        // cond: uint32(c) < 16
        // result: (SRAWconst (SignExt16to32 x) [c])
        for {
@@ -7728,7 +8084,7 @@ func rewriteValuePPC64_OpRsh16x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x32  x (MOVDconst [c]))
+       // match: (Rsh16x32 x (MOVDconst [c]))
        // cond: uint32(c) < 16
        // result: (SRAWconst (SignExt16to32 x) [c])
        for {
@@ -7777,7 +8133,7 @@ func rewriteValuePPC64_OpRsh16x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x64  x (Const64 [c]))
+       // match: (Rsh16x64 x (Const64 [c]))
        // cond: uint64(c) < 16
        // result: (SRAWconst (SignExt16to32 x) [c])
        for {
@@ -7817,7 +8173,7 @@ func rewriteValuePPC64_OpRsh16x64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x64  x (MOVDconst [c]))
+       // match: (Rsh16x64 x (MOVDconst [c]))
        // cond: uint64(c) < 16
        // result: (SRAWconst (SignExt16to32 x) [c])
        for {
@@ -8114,7 +8470,7 @@ func rewriteValuePPC64_OpRsh32x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x32  x (Const64 [c]))
+       // match: (Rsh32x32 x (Const64 [c]))
        // cond: uint32(c) < 32
        // result: (SRAWconst x [c])
        for {
@@ -8132,7 +8488,7 @@ func rewriteValuePPC64_OpRsh32x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh32x32  x (MOVDconst [c]))
+       // match: (Rsh32x32 x (MOVDconst [c]))
        // cond: uint32(c) < 32
        // result: (SRAWconst x [c])
        for {
@@ -8177,7 +8533,7 @@ func rewriteValuePPC64_OpRsh32x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x64  x (Const64 [c]))
+       // match: (Rsh32x64 x (Const64 [c]))
        // cond: uint64(c) < 32
        // result: (SRAWconst x [c])
        for {
@@ -8213,7 +8569,7 @@ func rewriteValuePPC64_OpRsh32x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh32x64  x (MOVDconst [c]))
+       // match: (Rsh32x64 x (MOVDconst [c]))
        // cond: uint64(c) < 32
        // result: (SRAWconst x [c])
        for {
@@ -8504,7 +8860,7 @@ func rewriteValuePPC64_OpRsh64x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x32  x (Const64 [c]))
+       // match: (Rsh64x32 x (Const64 [c]))
        // cond: uint32(c) < 64
        // result: (SRADconst x [c])
        for {
@@ -8522,7 +8878,7 @@ func rewriteValuePPC64_OpRsh64x32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh64x32  x (MOVDconst [c]))
+       // match: (Rsh64x32 x (MOVDconst [c]))
        // cond: uint32(c) < 64
        // result: (SRADconst x [c])
        for {
@@ -8567,7 +8923,7 @@ func rewriteValuePPC64_OpRsh64x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x64  x (Const64 [c]))
+       // match: (Rsh64x64 x (Const64 [c]))
        // cond: uint64(c) < 64
        // result: (SRADconst x [c])
        for {
@@ -8603,7 +8959,7 @@ func rewriteValuePPC64_OpRsh64x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh64x64  x (MOVDconst [c]))
+       // match: (Rsh64x64 x (MOVDconst [c]))
        // cond: uint64(c) < 64
        // result: (SRADconst x [c])
        for {
@@ -8702,7 +9058,7 @@ func rewriteValuePPC64_OpRsh8Ux32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux32  x (Const64 [c]))
+       // match: (Rsh8Ux32 x (Const64 [c]))
        // cond: uint32(c) < 8
        // result: (SRWconst (ZeroExt8to32  x) [c])
        for {
@@ -8722,7 +9078,7 @@ func rewriteValuePPC64_OpRsh8Ux32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux32  x (MOVDconst [c]))
+       // match: (Rsh8Ux32 x (MOVDconst [c]))
        // cond: uint32(c) < 8
        // result: (SRWconst (ZeroExt8to32  x) [c])
        for {
@@ -8771,7 +9127,7 @@ func rewriteValuePPC64_OpRsh8Ux64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux64  x (Const64 [c]))
+       // match: (Rsh8Ux64 x (Const64 [c]))
        // cond: uint64(c) < 8
        // result: (SRWconst (ZeroExt8to32  x) [c])
        for {
@@ -8791,7 +9147,7 @@ func rewriteValuePPC64_OpRsh8Ux64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux64  _ (Const64 [c]))
+       // match: (Rsh8Ux64 _ (Const64 [c]))
        // cond: uint64(c) >= 8
        // result: (MOVDconst [0])
        for {
@@ -8807,7 +9163,7 @@ func rewriteValuePPC64_OpRsh8Ux64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Rsh8Ux64  x (MOVDconst [c]))
+       // match: (Rsh8Ux64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SRWconst (ZeroExt8to32  x) [c])
        for {
@@ -8912,7 +9268,7 @@ func rewriteValuePPC64_OpRsh8x32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x32   x (Const64 [c]))
+       // match: (Rsh8x32 x (Const64 [c]))
        // cond: uint32(c) < 8
        // result: (SRAWconst (SignExt8to32  x) [c])
        for {
@@ -8932,7 +9288,7 @@ func rewriteValuePPC64_OpRsh8x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x32   x (MOVDconst [c]))
+       // match: (Rsh8x32 x (MOVDconst [c]))
        // cond: uint32(c) < 8
        // result: (SRAWconst (SignExt8to32  x) [c])
        for {
@@ -8981,7 +9337,7 @@ func rewriteValuePPC64_OpRsh8x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x64   x (Const64 [c]))
+       // match: (Rsh8x64 x (Const64 [c]))
        // cond: uint64(c) < 8
        // result: (SRAWconst (SignExt8to32  x) [c])
        for {
@@ -9001,7 +9357,7 @@ func rewriteValuePPC64_OpRsh8x64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x64  x (Const64 [c]))
+       // match: (Rsh8x64 x (Const64 [c]))
        // cond: uint64(c) >= 8
        // result: (SRAWconst (SignExt8to32  x) [63])
        for {
@@ -9021,7 +9377,7 @@ func rewriteValuePPC64_OpRsh8x64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x64   x (MOVDconst [c]))
+       // match: (Rsh8x64 x (MOVDconst [c]))
        // cond: uint64(c) < 8
        // result: (SRAWconst (SignExt8to32  x) [c])
        for {
@@ -9126,7 +9482,7 @@ func rewriteValuePPC64_OpSignExt32to64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSignExt8to16(v *Value) bool {
-       // match: (SignExt8to16  x)
+       // match: (SignExt8to16 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9137,7 +9493,7 @@ func rewriteValuePPC64_OpSignExt8to16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSignExt8to32(v *Value) bool {
-       // match: (SignExt8to32  x)
+       // match: (SignExt8to32 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9148,7 +9504,7 @@ func rewriteValuePPC64_OpSignExt8to32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSignExt8to64(v *Value) bool {
-       // match: (SignExt8to64  x)
+       // match: (SignExt8to64 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9324,7 +9680,7 @@ func rewriteValuePPC64_OpStore(v *Value) bool {
        return false
 }
 func rewriteValuePPC64_OpSub16(v *Value) bool {
-       // match: (Sub16  x y)
+       // match: (Sub16 x y)
        // cond:
        // result: (SUB x y)
        for {
@@ -9337,7 +9693,7 @@ func rewriteValuePPC64_OpSub16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSub32(v *Value) bool {
-       // match: (Sub32  x y)
+       // match: (Sub32 x y)
        // cond:
        // result: (SUB x y)
        for {
@@ -9363,7 +9719,7 @@ func rewriteValuePPC64_OpSub32F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSub64(v *Value) bool {
-       // match: (Sub64  x y)
+       // match: (Sub64 x y)
        // cond:
        // result: (SUB  x y)
        for {
@@ -9389,7 +9745,7 @@ func rewriteValuePPC64_OpSub64F(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpSub8(v *Value) bool {
-       // match: (Sub8   x y)
+       // match: (Sub8 x y)
        // cond:
        // result: (SUB x y)
        for {
@@ -9415,7 +9771,7 @@ func rewriteValuePPC64_OpSubPtr(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpTrunc16to8(v *Value) bool {
-       // match: (Trunc16to8  x)
+       // match: (Trunc16to8 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9437,7 +9793,7 @@ func rewriteValuePPC64_OpTrunc32to16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpTrunc32to8(v *Value) bool {
-       // match: (Trunc32to8  x)
+       // match: (Trunc32to8 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9470,7 +9826,7 @@ func rewriteValuePPC64_OpTrunc64to32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpTrunc64to8(v *Value) bool {
-       // match: (Trunc64to8  x)
+       // match: (Trunc64to8 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -9520,7 +9876,7 @@ func rewriteValuePPC64_OpXor64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpXor8(v *Value) bool {
-       // match: (Xor8  x y)
+       // match: (Xor8 x y)
        // cond:
        // result: (XOR x y)
        for {
@@ -9921,7 +10277,7 @@ func rewriteValuePPC64_OpZeroExt32to64(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpZeroExt8to16(v *Value) bool {
-       // match: (ZeroExt8to16  x)
+       // match: (ZeroExt8to16 x)
        // cond:
        // result: (MOVBZreg x)
        for {
@@ -9932,7 +10288,7 @@ func rewriteValuePPC64_OpZeroExt8to16(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpZeroExt8to32(v *Value) bool {
-       // match: (ZeroExt8to32  x)
+       // match: (ZeroExt8to32 x)
        // cond:
        // result: (MOVBZreg x)
        for {
@@ -9943,7 +10299,7 @@ func rewriteValuePPC64_OpZeroExt8to32(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpZeroExt8to64(v *Value) bool {
-       // match: (ZeroExt8to64  x)
+       // match: (ZeroExt8to64 x)
        // cond:
        // result: (MOVBZreg x)
        for {
index 6274d5475ff60ef493192d768460b2b80be41818..85b75019e2524b292c80530b2e26fdd94b40af30 100644 (file)
@@ -718,7 +718,7 @@ func rewriteValueS390X(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpAdd16(v *Value) bool {
-       // match: (Add16  x y)
+       // match: (Add16 x y)
        // cond:
        // result: (ADDW  x y)
        for {
@@ -731,7 +731,7 @@ func rewriteValueS390X_OpAdd16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpAdd32(v *Value) bool {
-       // match: (Add32  x y)
+       // match: (Add32 x y)
        // cond:
        // result: (ADDW  x y)
        for {
@@ -757,7 +757,7 @@ func rewriteValueS390X_OpAdd32F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpAdd64(v *Value) bool {
-       // match: (Add64  x y)
+       // match: (Add64 x y)
        // cond:
        // result: (ADD  x y)
        for {
@@ -783,7 +783,7 @@ func rewriteValueS390X_OpAdd64F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpAdd8(v *Value) bool {
-       // match: (Add8   x y)
+       // match: (Add8 x y)
        // cond:
        // result: (ADDW  x y)
        for {
@@ -861,7 +861,7 @@ func rewriteValueS390X_OpAnd64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpAnd8(v *Value) bool {
-       // match: (And8  x y)
+       // match: (And8 x y)
        // cond:
        // result: (ANDW x y)
        for {
@@ -1193,7 +1193,7 @@ func rewriteValueS390X_OpCom64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpCom8(v *Value) bool {
-       // match: (Com8  x)
+       // match: (Com8 x)
        // cond:
        // result: (NOTW x)
        for {
@@ -1204,7 +1204,7 @@ func rewriteValueS390X_OpCom8(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpConst16(v *Value) bool {
-       // match: (Const16  [val])
+       // match: (Const16 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1215,7 +1215,7 @@ func rewriteValueS390X_OpConst16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpConst32(v *Value) bool {
-       // match: (Const32  [val])
+       // match: (Const32 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1237,7 +1237,7 @@ func rewriteValueS390X_OpConst32F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpConst64(v *Value) bool {
-       // match: (Const64  [val])
+       // match: (Const64 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1259,7 +1259,7 @@ func rewriteValueS390X_OpConst64F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpConst8(v *Value) bool {
-       // match: (Const8   [val])
+       // match: (Const8 [val])
        // cond:
        // result: (MOVDconst [val])
        for {
@@ -1480,7 +1480,7 @@ func rewriteValueS390X_OpDiv16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div16  x y)
+       // match: (Div16 x y)
        // cond:
        // result: (DIVW  (MOVHreg x) (MOVHreg y))
        for {
@@ -1522,7 +1522,7 @@ func rewriteValueS390X_OpDiv32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div32  x y)
+       // match: (Div32 x y)
        // cond:
        // result: (DIVW  (MOVWreg x) y)
        for {
@@ -1569,7 +1569,7 @@ func rewriteValueS390X_OpDiv32u(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpDiv64(v *Value) bool {
-       // match: (Div64  x y)
+       // match: (Div64 x y)
        // cond:
        // result: (DIVD  x y)
        for {
@@ -1612,7 +1612,7 @@ func rewriteValueS390X_OpDiv8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8   x y)
+       // match: (Div8 x y)
        // cond:
        // result: (DIVW  (MOVBreg x) (MOVBreg y))
        for {
@@ -1633,7 +1633,7 @@ func rewriteValueS390X_OpDiv8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8u  x y)
+       // match: (Div8u x y)
        // cond:
        // result: (DIVWU (MOVBZreg x) (MOVBZreg y))
        for {
@@ -1654,7 +1654,7 @@ func rewriteValueS390X_OpEq16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Eq16  x y)
+       // match: (Eq16 x y)
        // cond:
        // result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -1683,7 +1683,7 @@ func rewriteValueS390X_OpEq32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Eq32  x y)
+       // match: (Eq32 x y)
        // cond:
        // result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -1733,7 +1733,7 @@ func rewriteValueS390X_OpEq64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Eq64  x y)
+       // match: (Eq64 x y)
        // cond:
        // result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -1783,7 +1783,7 @@ func rewriteValueS390X_OpEq8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Eq8   x y)
+       // match: (Eq8 x y)
        // cond:
        // result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -1812,7 +1812,7 @@ func rewriteValueS390X_OpEqB(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (EqB   x y)
+       // match: (EqB x y)
        // cond:
        // result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -1866,7 +1866,7 @@ func rewriteValueS390X_OpGeq16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Geq16  x y)
+       // match: (Geq16 x y)
        // cond:
        // result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -1924,7 +1924,7 @@ func rewriteValueS390X_OpGeq32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Geq32  x y)
+       // match: (Geq32 x y)
        // cond:
        // result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -1999,7 +1999,7 @@ func rewriteValueS390X_OpGeq64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Geq64  x y)
+       // match: (Geq64 x y)
        // cond:
        // result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -2074,7 +2074,7 @@ func rewriteValueS390X_OpGeq8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Geq8   x y)
+       // match: (Geq8 x y)
        // cond:
        // result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -2103,7 +2103,7 @@ func rewriteValueS390X_OpGeq8U(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Geq8U  x y)
+       // match: (Geq8U x y)
        // cond:
        // result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
        for {
@@ -2152,7 +2152,7 @@ func rewriteValueS390X_OpGreater16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Greater16  x y)
+       // match: (Greater16 x y)
        // cond:
        // result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -2210,7 +2210,7 @@ func rewriteValueS390X_OpGreater32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Greater32  x y)
+       // match: (Greater32 x y)
        // cond:
        // result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -2285,7 +2285,7 @@ func rewriteValueS390X_OpGreater64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Greater64  x y)
+       // match: (Greater64 x y)
        // cond:
        // result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -2360,7 +2360,7 @@ func rewriteValueS390X_OpGreater8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Greater8   x y)
+       // match: (Greater8 x y)
        // cond:
        // result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -2389,7 +2389,7 @@ func rewriteValueS390X_OpGreater8U(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Greater8U  x y)
+       // match: (Greater8U x y)
        // cond:
        // result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
        for {
@@ -2418,7 +2418,7 @@ func rewriteValueS390X_OpHmul32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Hmul32  x y)
+       // match: (Hmul32 x y)
        // cond:
        // result: (SRDconst [32] (MULLD (MOVWreg x) (MOVWreg y)))
        for {
@@ -2462,7 +2462,7 @@ func rewriteValueS390X_OpHmul32u(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpHmul64(v *Value) bool {
-       // match: (Hmul64  x y)
+       // match: (Hmul64 x y)
        // cond:
        // result: (MULHD  x y)
        for {
@@ -2599,7 +2599,7 @@ func rewriteValueS390X_OpLeq16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Leq16  x y)
+       // match: (Leq16 x y)
        // cond:
        // result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -2657,7 +2657,7 @@ func rewriteValueS390X_OpLeq32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Leq32  x y)
+       // match: (Leq32 x y)
        // cond:
        // result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -2732,7 +2732,7 @@ func rewriteValueS390X_OpLeq64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Leq64  x y)
+       // match: (Leq64 x y)
        // cond:
        // result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -2807,7 +2807,7 @@ func rewriteValueS390X_OpLeq8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Leq8   x y)
+       // match: (Leq8 x y)
        // cond:
        // result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -2836,7 +2836,7 @@ func rewriteValueS390X_OpLeq8U(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Leq8U  x y)
+       // match: (Leq8U x y)
        // cond:
        // result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
        for {
@@ -2865,7 +2865,7 @@ func rewriteValueS390X_OpLess16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Less16  x y)
+       // match: (Less16 x y)
        // cond:
        // result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -2923,7 +2923,7 @@ func rewriteValueS390X_OpLess32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Less32  x y)
+       // match: (Less32 x y)
        // cond:
        // result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -2998,7 +2998,7 @@ func rewriteValueS390X_OpLess64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Less64  x y)
+       // match: (Less64 x y)
        // cond:
        // result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -3073,7 +3073,7 @@ func rewriteValueS390X_OpLess8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Less8   x y)
+       // match: (Less8 x y)
        // cond:
        // result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -3102,7 +3102,7 @@ func rewriteValueS390X_OpLess8U(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Less8U  x y)
+       // match: (Less8U x y)
        // cond:
        // result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
        for {
@@ -3345,7 +3345,7 @@ func rewriteValueS390X_OpLsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x8  <t> x y)
+       // match: (Lsh16x8 <t> x y)
        // cond:
        // result: (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
        for {
@@ -3449,7 +3449,7 @@ func rewriteValueS390X_OpLsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x8  <t> x y)
+       // match: (Lsh32x8 <t> x y)
        // cond:
        // result: (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
        for {
@@ -3553,7 +3553,7 @@ func rewriteValueS390X_OpLsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x8  <t> x y)
+       // match: (Lsh64x8 <t> x y)
        // cond:
        // result: (AND (SLD <t> x y) (SUBEcarrymask <t> (CMPWUconst (MOVBZreg y) [63])))
        for {
@@ -3657,7 +3657,7 @@ func rewriteValueS390X_OpLsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x8  <t> x y)
+       // match: (Lsh8x8 <t> x y)
        // cond:
        // result: (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
        for {
@@ -3685,7 +3685,7 @@ func rewriteValueS390X_OpMod16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod16  x y)
+       // match: (Mod16 x y)
        // cond:
        // result: (MODW  (MOVHreg x) (MOVHreg y))
        for {
@@ -3727,7 +3727,7 @@ func rewriteValueS390X_OpMod32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod32  x y)
+       // match: (Mod32 x y)
        // cond:
        // result: (MODW  (MOVWreg x) y)
        for {
@@ -3761,7 +3761,7 @@ func rewriteValueS390X_OpMod32u(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpMod64(v *Value) bool {
-       // match: (Mod64  x y)
+       // match: (Mod64 x y)
        // cond:
        // result: (MODD  x y)
        for {
@@ -3791,7 +3791,7 @@ func rewriteValueS390X_OpMod8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8   x y)
+       // match: (Mod8 x y)
        // cond:
        // result: (MODW  (MOVBreg x) (MOVBreg y))
        for {
@@ -3812,7 +3812,7 @@ func rewriteValueS390X_OpMod8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mod8u  x y)
+       // match: (Mod8u x y)
        // cond:
        // result: (MODWU (MOVBZreg x) (MOVBZreg y))
        for {
@@ -4235,7 +4235,7 @@ func rewriteValueS390X_OpMove(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpMul16(v *Value) bool {
-       // match: (Mul16  x y)
+       // match: (Mul16 x y)
        // cond:
        // result: (MULLW  x y)
        for {
@@ -4248,7 +4248,7 @@ func rewriteValueS390X_OpMul16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpMul32(v *Value) bool {
-       // match: (Mul32  x y)
+       // match: (Mul32 x y)
        // cond:
        // result: (MULLW  x y)
        for {
@@ -4274,7 +4274,7 @@ func rewriteValueS390X_OpMul32F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpMul64(v *Value) bool {
-       // match: (Mul64  x y)
+       // match: (Mul64 x y)
        // cond:
        // result: (MULLD  x y)
        for {
@@ -4300,7 +4300,7 @@ func rewriteValueS390X_OpMul64F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpMul8(v *Value) bool {
-       // match: (Mul8   x y)
+       // match: (Mul8 x y)
        // cond:
        // result: (MULLW  x y)
        for {
@@ -4317,7 +4317,7 @@ func rewriteValueS390X_OpNeg16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neg16  x)
+       // match: (Neg16 x)
        // cond:
        // result: (NEGW (MOVHreg x))
        for {
@@ -4330,7 +4330,7 @@ func rewriteValueS390X_OpNeg16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpNeg32(v *Value) bool {
-       // match: (Neg32  x)
+       // match: (Neg32 x)
        // cond:
        // result: (NEGW x)
        for {
@@ -4352,7 +4352,7 @@ func rewriteValueS390X_OpNeg32F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpNeg64(v *Value) bool {
-       // match: (Neg64  x)
+       // match: (Neg64 x)
        // cond:
        // result: (NEG x)
        for {
@@ -4378,7 +4378,7 @@ func rewriteValueS390X_OpNeg8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neg8   x)
+       // match: (Neg8 x)
        // cond:
        // result: (NEGW (MOVBreg x))
        for {
@@ -4395,7 +4395,7 @@ func rewriteValueS390X_OpNeq16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neq16  x y)
+       // match: (Neq16 x y)
        // cond:
        // result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
        for {
@@ -4424,7 +4424,7 @@ func rewriteValueS390X_OpNeq32(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neq32  x y)
+       // match: (Neq32 x y)
        // cond:
        // result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
        for {
@@ -4474,7 +4474,7 @@ func rewriteValueS390X_OpNeq64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neq64  x y)
+       // match: (Neq64 x y)
        // cond:
        // result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
        for {
@@ -4524,7 +4524,7 @@ func rewriteValueS390X_OpNeq8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Neq8   x y)
+       // match: (Neq8 x y)
        // cond:
        // result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -4553,7 +4553,7 @@ func rewriteValueS390X_OpNeqB(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (NeqB   x y)
+       // match: (NeqB x y)
        // cond:
        // result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
        for {
@@ -4714,7 +4714,7 @@ func rewriteValueS390X_OpOr64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpOr8(v *Value) bool {
-       // match: (Or8  x y)
+       // match: (Or8 x y)
        // cond:
        // result: (ORW x y)
        for {
@@ -4852,7 +4852,7 @@ func rewriteValueS390X_OpRsh16Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16Ux8  <t> x y)
+       // match: (Rsh16Ux8 <t> x y)
        // cond:
        // result: (ANDW (SRW <t> (MOVHZreg x) y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [15])))
        for {
@@ -4977,7 +4977,7 @@ func rewriteValueS390X_OpRsh16x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x8  <t> x y)
+       // match: (Rsh16x8 <t> x y)
        // cond:
        // result: (SRAW <t> (MOVHreg x) (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [15])))))
        for {
@@ -5086,7 +5086,7 @@ func rewriteValueS390X_OpRsh32Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32Ux8  <t> x y)
+       // match: (Rsh32Ux8 <t> x y)
        // cond:
        // result: (ANDW (SRW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
        for {
@@ -5199,7 +5199,7 @@ func rewriteValueS390X_OpRsh32x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x8  <t> x y)
+       // match: (Rsh32x8 <t> x y)
        // cond:
        // result: (SRAW <t> x (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [31])))))
        for {
@@ -5306,7 +5306,7 @@ func rewriteValueS390X_OpRsh64Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64Ux8  <t> x y)
+       // match: (Rsh64Ux8 <t> x y)
        // cond:
        // result: (AND (SRD <t> x y) (SUBEcarrymask <t> (CMPWUconst (MOVBZreg y) [63])))
        for {
@@ -5419,7 +5419,7 @@ func rewriteValueS390X_OpRsh64x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x8  <t> x y)
+       // match: (Rsh64x8 <t> x y)
        // cond:
        // result: (SRAD <t> x (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [63])))))
        for {
@@ -5536,7 +5536,7 @@ func rewriteValueS390X_OpRsh8Ux8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux8  <t> x y)
+       // match: (Rsh8Ux8 <t> x y)
        // cond:
        // result: (ANDW (SRW <t> (MOVBZreg x) y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [7])))
        for {
@@ -5661,7 +5661,7 @@ func rewriteValueS390X_OpRsh8x8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8x8  <t> x y)
+       // match: (Rsh8x8 <t> x y)
        // cond:
        // result: (SRAW <t> (MOVBreg x) (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [7])))))
        for {
@@ -5726,9 +5726,9 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADD (SLDconst x [c]) (SRDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [   c] x)
+       // match: (ADD (SLDconst x [c]) (SRDconst x [d]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSLDconst {
@@ -5740,10 +5740,11 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                if v_1.Op != OpS390XSRDconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpS390XRLLGconst)
@@ -5751,56 +5752,57 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADD (SRDconst x [c]) (SLDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [64-c] x)
+       // match: (ADD (SRDconst x [d]) (SLDconst x [c]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSRDconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpS390XSLDconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpS390XRLLGconst)
-               v.AuxInt = 64 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADD x (MOVDaddr [c] {s} y))
-       // cond: x.Op != OpSB && y.Op != OpSB
-       // result: (MOVDaddridx [c] {s} x y)
+       // match: (ADD idx (MOVDaddr [c] {s} ptr))
+       // cond: ptr.Op != OpSB && idx.Op != OpSB
+       // result: (MOVDaddridx [c] {s} ptr idx)
        for {
-               x := v.Args[0]
+               idx := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpS390XMOVDaddr {
                        break
                }
                c := v_1.AuxInt
                s := v_1.Aux
-               y := v_1.Args[0]
-               if !(x.Op != OpSB && y.Op != OpSB) {
+               ptr := v_1.Args[0]
+               if !(ptr.Op != OpSB && idx.Op != OpSB) {
                        break
                }
                v.reset(OpS390XMOVDaddridx)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                return true
        }
-       // match: (ADD (MOVDaddr [c] {s} x) y)
-       // cond: x.Op != OpSB && y.Op != OpSB
-       // result: (MOVDaddridx [c] {s} x y)
+       // match: (ADD (MOVDaddr [c] {s} ptr) idx)
+       // cond: ptr.Op != OpSB && idx.Op != OpSB
+       // result: (MOVDaddridx [c] {s} ptr idx)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XMOVDaddr {
@@ -5808,16 +5810,16 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                }
                c := v_0.AuxInt
                s := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(x.Op != OpSB && y.Op != OpSB) {
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               if !(ptr.Op != OpSB && idx.Op != OpSB) {
                        break
                }
                v.reset(OpS390XMOVDaddridx)
                v.AuxInt = c
                v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                return true
        }
        // match: (ADD x (NEG y))
@@ -5835,6 +5837,21 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADD (NEG y) x)
+       // cond:
+       // result: (SUB x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XNEG {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpS390XSUB)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADD <t> x g:(MOVDload [off] {sym} ptr mem))
        // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
        // result: (ADDload <t> [off] {sym} x ptr mem)
@@ -5887,6 +5904,58 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (ADD <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADD <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XADDW(v *Value) bool {
@@ -5920,9 +5989,9 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDW (SLWconst x [c]) (SRWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [   c] x)
+       // match: (ADDW (SLWconst x [c]) (SRWconst x [d]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSLWconst {
@@ -5934,10 +6003,11 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                if v_1.Op != OpS390XSRWconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpS390XRLLconst)
@@ -5945,28 +6015,29 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDW (SRWconst x [c]) (SLWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [32-c] x)
+       // match: (ADDW (SRWconst x [d]) (SLWconst x [c]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpS390XSLWconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpS390XRLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -5985,6 +6056,21 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDW (NEGW y) x)
+       // cond:
+       // result: (SUBW x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XNEGW {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpS390XSUBW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDW <t> x g:(MOVWload [off] {sym} ptr mem))
        // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
        // result: (ADDWload <t> [off] {sym} x ptr mem)
@@ -6037,6 +6123,58 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (ADDW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (ADDW <t> x g:(MOVWZload [off] {sym} ptr mem))
        // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
        // result: (ADDWload <t> [off] {sym} x ptr mem)
@@ -6089,6 +6227,58 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (ADDW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ADDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XADDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XADDWconst(v *Value) bool {
@@ -6292,22 +6482,6 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (AND (MOVDconst [0xFF]) x)
-       // cond:
-       // result: (MOVBZreg x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
-                       break
-               }
-               if v_0.AuxInt != 0xFF {
-                       break
-               }
-               x := v.Args[1]
-               v.reset(OpS390XMOVBZreg)
-               v.AddArg(x)
-               return true
-       }
        // match: (AND x (MOVDconst [0xFF]))
        // cond:
        // result: (MOVBZreg x)
@@ -6324,19 +6498,19 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (AND (MOVDconst [0xFFFF]) x)
+       // match: (AND (MOVDconst [0xFF]) x)
        // cond:
-       // result: (MOVHZreg x)
+       // result: (MOVBZreg x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XMOVDconst {
                        break
                }
-               if v_0.AuxInt != 0xFFFF {
+               if v_0.AuxInt != 0xFF {
                        break
                }
                x := v.Args[1]
-               v.reset(OpS390XMOVHZreg)
+               v.reset(OpS390XMOVBZreg)
                v.AddArg(x)
                return true
        }
@@ -6356,19 +6530,19 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (AND (MOVDconst [0xFFFFFFFF]) x)
+       // match: (AND (MOVDconst [0xFFFF]) x)
        // cond:
-       // result: (MOVWZreg x)
+       // result: (MOVHZreg x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XMOVDconst {
                        break
                }
-               if v_0.AuxInt != 0xFFFFFFFF {
+               if v_0.AuxInt != 0xFFFF {
                        break
                }
                x := v.Args[1]
-               v.reset(OpS390XMOVWZreg)
+               v.reset(OpS390XMOVHZreg)
                v.AddArg(x)
                return true
        }
@@ -6388,15 +6562,31 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (AND (MOVDconst [c]) (MOVDconst [d]))
+       // match: (AND (MOVDconst [0xFFFFFFFF]) x)
        // cond:
-       // result: (MOVDconst [c&d])
+       // result: (MOVWZreg x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XMOVDconst {
                        break
                }
-               c := v_0.AuxInt
+               if v_0.AuxInt != 0xFFFFFFFF {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpS390XMOVWZreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (AND (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c&d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpS390XMOVDconst {
                        break
@@ -6406,6 +6596,24 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AuxInt = c & d
                return true
        }
+       // match: (AND (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c&d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c & d
+               return true
+       }
        // match: (AND x x)
        // cond:
        // result: x
@@ -6471,6 +6679,58 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (AND <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (AND <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XANDW(v *Value) bool {
@@ -6569,6 +6829,58 @@ func rewriteValueS390X_OpS390XANDW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (ANDW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ANDW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (ANDW <t> x g:(MOVWZload [off] {sym} ptr mem))
        // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
        // result: (ANDWload <t> [off] {sym} x ptr mem)
@@ -6621,6 +6933,58 @@ func rewriteValueS390X_OpS390XANDW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (ANDW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ANDW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ANDWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XANDWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XANDWconst(v *Value) bool {
@@ -7237,23 +7601,6 @@ func rewriteValueS390X_OpS390XCMPconst(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpS390XFADD(v *Value) bool {
-       // match: (FADD x (FMUL y z))
-       // cond:
-       // result: (FMADD x y z)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XFMUL {
-                       break
-               }
-               y := v_1.Args[0]
-               z := v_1.Args[1]
-               v.reset(OpS390XFMADD)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(z)
-               return true
-       }
        // match: (FADD (FMUL y z) x)
        // cond:
        // result: (FMADD x y z)
@@ -7271,26 +7618,26 @@ func rewriteValueS390X_OpS390XFADD(v *Value) bool {
                v.AddArg(z)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XFADDS(v *Value) bool {
-       // match: (FADDS x (FMULS y z))
+       // match: (FADD x (FMUL y z))
        // cond:
-       // result: (FMADDS x y z)
+       // result: (FMADD x y z)
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpS390XFMULS {
+               if v_1.Op != OpS390XFMUL {
                        break
                }
                y := v_1.Args[0]
                z := v_1.Args[1]
-               v.reset(OpS390XFMADDS)
+               v.reset(OpS390XFMADD)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(z)
                return true
        }
+       return false
+}
+func rewriteValueS390X_OpS390XFADDS(v *Value) bool {
        // match: (FADDS (FMULS y z) x)
        // cond:
        // result: (FMADDS x y z)
@@ -7308,10 +7655,27 @@ func rewriteValueS390X_OpS390XFADDS(v *Value) bool {
                v.AddArg(z)
                return true
        }
+       // match: (FADDS x (FMULS y z))
+       // cond:
+       // result: (FMADDS x y z)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XFMULS {
+                       break
+               }
+               y := v_1.Args[0]
+               z := v_1.Args[1]
+               v.reset(OpS390XFMADDS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XFMOVDload(v *Value) bool {
-       // match: (FMOVDload  [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is20Bit(off1+off2)
        // result: (FMOVDload [off1+off2] {sym} ptr mem)
        for {
@@ -7617,7 +7981,7 @@ func rewriteValueS390X_OpS390XFMOVDstoreidx(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpS390XFMOVSload(v *Value) bool {
-       // match: (FMOVSload  [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is20Bit(off1+off2)
        // result: (FMOVSload [off1+off2] {sym} ptr mem)
        for {
@@ -8017,7 +8381,7 @@ func rewriteValueS390X_OpS390XMOVBZload(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVBZload  [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVBZload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is20Bit(off1+off2)
        // result: (MOVBZload [off1+off2] {sym} ptr mem)
        for {
@@ -8040,7 +8404,7 @@ func rewriteValueS390X_OpS390XMOVBZload(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBZload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // match: (MOVBZload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVBZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
@@ -8139,19 +8503,19 @@ func rewriteValueS390X_OpS390XMOVBZloadidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBZloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
+       // match: (MOVBZloadidx [c] {sym} idx (ADDconst [d] ptr) mem)
        // cond:
        // result: (MOVBZloadidx [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
-               ptr := v.Args[0]
+               idx := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpS390XADDconst {
                        break
                }
                d := v_1.AuxInt
-               idx := v_1.Args[0]
+               ptr := v_1.Args[0]
                mem := v.Args[2]
                v.reset(OpS390XMOVBZloadidx)
                v.AuxInt = c + d
@@ -8161,18 +8525,62 @@ func rewriteValueS390X_OpS390XMOVBZloadidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVBZreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _))
-       // cond: int64(uint8(c)) == c && int64(uint8(d)) == d
-       // result: (MOVDreg x)
+       // match: (MOVBZloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
+       // cond:
+       // result: (MOVBZloadidx [c+d] {sym} ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVDLT {
-                       break
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVBZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBZloadidx [c] {sym} (ADDconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVBZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVBZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVBZreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _))
+       // cond: int64(uint8(c)) == c && int64(uint8(d)) == d
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVDLT {
+                       break
                }
                x_0 := x.Args[0]
                if x_0.Op != OpS390XMOVDconst {
@@ -8474,7 +8882,7 @@ func rewriteValueS390X_OpS390XMOVBZreg(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpS390XMOVBload(v *Value) bool {
-       // match: (MOVBload   [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is20Bit(off1+off2)
        // result: (MOVBload  [off1+off2] {sym} ptr mem)
        for {
@@ -8649,7 +9057,7 @@ func rewriteValueS390X_OpS390XMOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // match: (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem)
        // cond: is20Bit(off1+off2)
        // result: (MOVBstore  [off1+off2] {sym} ptr val mem)
        for {
@@ -8697,7 +9105,7 @@ func rewriteValueS390X_OpS390XMOVBstore(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
+       // match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
        // result: (MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
@@ -9247,6 +9655,30 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstoreidx [c] {sym} idx (ADDconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVBstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVBstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
        // cond:
        // result: (MOVBstoreidx [c+d] {sym} ptr idx val mem)
@@ -9271,6 +9703,30 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstoreidx [c] {sym} (ADDconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVBstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVBstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
        // result: (MOVHstoreidx [i-1] {s} p idx w mem)
@@ -9319,20 +9775,15 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
+       // match: (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} idx p (SRDconst [8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
                p := v.Args[0]
                idx := v.Args[1]
-               w0 := v.Args[2]
-               if w0.Op != OpS390XSRDconst {
-                       break
-               }
-               j := w0.AuxInt
-               w := w0.Args[0]
+               w := v.Args[2]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
                        break
@@ -9343,17 +9794,17 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if x.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x.Args[1] {
                        break
                }
                x_2 := x.Args[2]
                if x_2.Op != OpS390XSRDconst {
                        break
                }
-               if x_2.AuxInt != j+8 {
+               if x_2.AuxInt != 8 {
                        break
                }
                if w != x_2.Args[0] {
@@ -9368,18 +9819,18 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
+       // match: (MOVBstoreidx [i] {s} idx p w x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
        // result: (MOVHstoreidx [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
+               idx := v.Args[0]
+               p := v.Args[1]
                w := v.Args[2]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
@@ -9398,7 +9849,7 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                        break
                }
                x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRWconst {
+               if x_2.Op != OpS390XSRDconst {
                        break
                }
                if x_2.AuxInt != 8 {
@@ -9420,20 +9871,15 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
+       // match: (MOVBstoreidx [i] {s} idx p w x:(MOVBstoreidx [i-1] {s} idx p (SRDconst [8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w0 := v.Args[2]
-               if w0.Op != OpS390XSRWconst {
-                       break
-               }
-               j := w0.AuxInt
-               w := w0.Args[0]
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
                        break
@@ -9444,17 +9890,17 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if x.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x.Args[1] {
                        break
                }
                x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRWconst {
+               if x_2.Op != OpS390XSRDconst {
                        break
                }
-               if x_2.AuxInt != j+8 {
+               if x_2.AuxInt != 8 {
                        break
                }
                if w != x_2.Args[0] {
@@ -9469,26 +9915,24 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // match: (MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
                p := v.Args[0]
                idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
-                       break
-               }
-               if v_2.AuxInt != 8 {
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
                        break
                }
-               w := v_2.Args[0]
+               j := w0.AuxInt
+               w := w0.Args[0]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
                        break
@@ -9505,36 +9949,43 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if idx != x.Args[1] {
                        break
                }
-               if w != x.Args[2] {
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
                        break
                }
                mem := x.Args[3]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpS390XMOVHBRstoreidx)
+               v.reset(OpS390XMOVHstoreidx)
                v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
+       // match: (MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p (SRDconst [j+8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
                p := v.Args[0]
                idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
+               j := w0.AuxInt
+               w := w0.Args[0]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
                        break
@@ -9545,27 +9996,27 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if x.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x.Args[1] {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpS390XSRDconst {
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
                        break
                }
-               if w0.AuxInt != j-8 {
+               if x_2.AuxInt != j+8 {
                        break
                }
-               if w != w0.Args[0] {
+               if w != x_2.Args[0] {
                        break
                }
                mem := x.Args[3]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpS390XMOVHBRstoreidx)
+               v.reset(OpS390XMOVHstoreidx)
                v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
@@ -9574,22 +10025,20 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // match: (MOVBstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRWconst {
-                       break
-               }
-               if v_2.AuxInt != 8 {
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
                        break
                }
-               w := v_2.Args[0]
+               j := w0.AuxInt
+               w := w0.Args[0]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
                        break
@@ -9606,36 +10055,43 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if idx != x.Args[1] {
                        break
                }
-               if w != x.Args[2] {
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
                        break
                }
                mem := x.Args[3]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpS390XMOVHBRstoreidx)
+               v.reset(OpS390XMOVHstoreidx)
                v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
+       // match: (MOVBstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p (SRDconst [j+8] w) mem))
        // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRWconst {
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
+               j := w0.AuxInt
+               w := w0.Args[0]
                x := v.Args[3]
                if x.Op != OpS390XMOVBstoreidx {
                        break
@@ -9646,27 +10102,27 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                if x.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x.Args[1] {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpS390XSRWconst {
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
                        break
                }
-               if w0.AuxInt != j-8 {
+               if x_2.AuxInt != j+8 {
                        break
                }
-               if w != w0.Args[0] {
+               if w != x_2.Args[0] {
                        break
                }
                mem := x.Args[3]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpS390XMOVHBRstoreidx)
+               v.reset(OpS390XMOVHstoreidx)
                v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
@@ -9675,5559 +10131,20495 @@ func rewriteValueS390X_OpS390XMOVBstoreidx(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDEQ(v *Value) bool {
-       // match: (MOVDEQ x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDEQ x y cmp)
+       // match: (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
        for {
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
-                       break
-               }
-               cmp := v_2.Args[0]
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} idx p (SRWconst [8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p w x:(MOVBstoreidx [i-1] {s} idx p (SRWconst [8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p (SRWconst [j+8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p (SRWconst [j+8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p w0:(SRDconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p w0:(SRDconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p w0:(SRWconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx [i] {s} idx p (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} idx p w0:(SRWconst [j-8] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVBstoreidx {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDEQ(v *Value) bool {
+       // match: (MOVDEQ x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDEQ x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
                v.reset(OpS390XMOVDEQ)
                v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDEQ _ x (FlagEQ))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDEQ y _ (FlagLT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDEQ y _ (FlagGT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDGE(v *Value) bool {
+       // match: (MOVDGE x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDLE x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
+               v.reset(OpS390XMOVDLE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDGE _ x (FlagEQ))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDGE y _ (FlagLT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDGE _ x (FlagGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDGT(v *Value) bool {
+       // match: (MOVDGT x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDLT x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
+               v.reset(OpS390XMOVDLT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDGT y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDGT y _ (FlagLT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDGT _ x (FlagGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDLE(v *Value) bool {
+       // match: (MOVDLE x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDGE x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
+               v.reset(OpS390XMOVDGE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDLE _ x (FlagEQ))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDLE _ x (FlagLT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDLE y _ (FlagGT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDLT(v *Value) bool {
+       // match: (MOVDLT x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDGT x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
+               v.reset(OpS390XMOVDGT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDLT y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDLT _ x (FlagLT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDLT y _ (FlagGT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDNE(v *Value) bool {
+       // match: (MOVDNE x y (InvertFlags cmp))
+       // cond:
+       // result: (MOVDNE x y cmp)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XInvertFlags {
+                       break
+               }
+               cmp := v_2.Args[0]
+               v.reset(OpS390XMOVDNE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
+               return true
+       }
+       // match: (MOVDNE y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDNE _ x (FlagLT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagLT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDNE _ x (FlagGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XFlagGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDaddridx(v *Value) bool {
+       // match: (MOVDaddridx [c] {s} (ADDconst [d] x) y)
+       // cond: is20Bit(c+d) && x.Op != OpSB
+       // result: (MOVDaddridx [c+d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is20Bit(c+d) && x.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDaddridx)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDaddridx [c] {s} x (ADDconst [d] y))
+       // cond: is20Bit(c+d) && y.Op != OpSB
+       // result: (MOVDaddridx [c+d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(is20Bit(c+d) && y.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDaddridx)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDaddridx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB
+       // result: (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               y := v_1.Args[0]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDaddridx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDload(v *Value) bool {
+       // match: (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDreg x)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVDload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVDload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off] {sym} (ADD ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVDloadidx [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDloadidx(v *Value) bool {
+       // match: (MOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDloadidx [c] {sym} idx (ADDconst [d] ptr) mem)
+       // cond:
+       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
+       // cond:
+       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDloadidx [c] {sym} (ADDconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVDloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDnop(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVDnop <t> x)
+       // cond: t.Compare(x.Type) == CMPeq
+       // result: x
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if !(t.Compare(x.Type) == CMPeq) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDnop (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVBZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVBload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVHZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVHload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVWZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVDload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVDload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVDload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVBZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVHZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVWZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDnop <t> x:(MOVDloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVDloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVDreg <t> x)
+       // cond: t.Compare(x.Type) == CMPeq
+       // result: x
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if !(t.Compare(x.Type) == CMPeq) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c
+               return true
+       }
+       // match: (MOVDreg x)
+       // cond: x.Uses == 1
+       // result: (MOVDnop x)
+       for {
+               x := v.Args[0]
+               if !(x.Uses == 1) {
+                       break
+               }
+               v.reset(OpS390XMOVDnop)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVBZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVBload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVHZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVHload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVWZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZload <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVDload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVDload  <t> [off] {sym} ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVDload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVBZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVBZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVHZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVWZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVDreg <t> x:(MOVDloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if x.Op != OpS390XMOVDloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDstore(v *Value) bool {
+       // match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVDstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [off] {sym} ptr (MOVDconst [c]) mem)
+       // cond: validValAndOff(c,off) && int64(int16(c)) == c && ptr.Op != OpSB
+       // result: (MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(c, off) && int64(int16(c)) == c && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreconst)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVDstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVDstoreidx [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && is20Bit(i-8)   && clobber(x)
+       // result: (STMG2 [i-8] {s} p w0 w1 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w1 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVDstore {
+                       break
+               }
+               if x.AuxInt != i-8 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && is20Bit(i-8) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTMG2)
+               v.AuxInt = i - 8
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
+       // cond: x.Uses == 1   && is20Bit(i-16)   && clobber(x)
+       // result: (STMG3 [i-16] {s} p w0 w1 w2 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w2 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XSTMG2 {
+                       break
+               }
+               if x.AuxInt != i-16 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               w1 := x.Args[2]
+               mem := x.Args[3]
+               if !(x.Uses == 1 && is20Bit(i-16) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTMG3)
+               v.AuxInt = i - 16
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(w2)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
+       // cond: x.Uses == 1   && is20Bit(i-24)   && clobber(x)
+       // result: (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w3 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XSTMG3 {
+                       break
+               }
+               if x.AuxInt != i-24 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               w1 := x.Args[2]
+               w2 := x.Args[3]
+               mem := x.Args[4]
+               if !(x.Uses == 1 && is20Bit(i-24) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTMG4)
+               v.AuxInt = i - 24
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(w2)
+               v.AddArg(w3)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDstoreconst(v *Value) bool {
+       // match: (MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVDstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDstoreidx(v *Value) bool {
+       // match: (MOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstoreidx [c] {sym} idx (ADDconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
+       // cond:
+       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstoreidx [c] {sym} (ADDconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHBRstore(v *Value) bool {
+       // match: (MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstore [i-2] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHBRstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstore [i-2] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHBRstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstore [i-2] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHBRstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstore [i-2] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHBRstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHBRstoreidx(v *Value) bool {
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} idx p w0:(SRDconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} idx p w0:(SRDconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} idx p w0:(SRWconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHBRstoreidx [i] {s} idx p (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} idx p w0:(SRWconst [j-16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRWconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHZload(v *Value) bool {
+       // match: (MOVHZload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVHZreg x)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVHstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHZreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVHZload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHZload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVHZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHZload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVHZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZload [off] {sym} (ADD ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVHZloadidx [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHZloadidx(v *Value) bool {
+       // match: (MOVHZloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZloadidx [c] {sym} idx (ADDconst [d] ptr) mem)
+       // cond:
+       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
+       // cond:
+       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZloadidx [c] {sym} (ADDconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHZreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVHZreg x:(MOVBZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZreg x:(MOVHZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZreg x:(Arg <t>))
+       // cond: (is8BitInt(t) || is16BitInt(t)) && !isSigned(t)
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpArg {
+                       break
+               }
+               t := x.Type
+               if !((is8BitInt(t) || is16BitInt(t)) && !isSigned(t)) {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZreg x:(MOVBZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZreg x:(MOVHZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHZreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(uint16(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(uint16(c))
+               return true
+       }
+       // match: (MOVHZreg x:(MOVHZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVHZreg x:(MOVHZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHZloadidx <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHload(v *Value) bool {
+       // match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVHload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVHreg x:(MOVBload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVBZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVHload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(Arg <t>))
+       // cond: (is8BitInt(t) || is16BitInt(t)) && isSigned(t)
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpArg {
+                       break
+               }
+               t := x.Type
+               if !((is8BitInt(t) || is16BitInt(t)) && isSigned(t)) {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVBreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVBZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int16(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(int16(c))
+               return true
+       }
+       // match: (MOVHreg x:(MOVHZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVHload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHstore(v *Value) bool {
+       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVHreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} ptr (MOVHZreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVHstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} ptr (MOVDconst [c]) mem)
+       // cond: validOff(off) && ptr.Op != OpSB
+       // result: (MOVHstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off) && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreconst)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVHstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVHstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVHstoreidx [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVWstore [i-2] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_1.AuxInt != 16 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVWstore [i-2] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w0 := v.Args[1]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_1.AuxInt != j+16 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVWstore [i-2] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_1.AuxInt != 16 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVWstore [i-2] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w0 := v.Args[1]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_1.AuxInt != j+16 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHstoreconst(v *Value) bool {
+       // match: (MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVHstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVHstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVHstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVWstoreconst [makeValAndOff(ValAndOff(c).Val()&0xffff | ValAndOff(a).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpS390XMOVHstoreconst {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               mem := x.Args[1]
+               if !(p.Op != OpSB && x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreconst)
+               v.AuxInt = makeValAndOff(ValAndOff(c).Val()&0xffff|ValAndOff(a).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHstoreidx(v *Value) bool {
+       // match: (MOVHstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [c] {sym} idx (ADDconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
+       // cond:
+       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [c] {sym} (ADDconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVHstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} idx p (SRDconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w x:(MOVHstoreidx [i-2] {s} idx p (SRDconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} idx p (SRDconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} idx p (SRDconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} idx p (SRWconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w x:(MOVHstoreidx [i-2] {s} idx p (SRWconst [16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != 16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} idx p (SRWconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx [i] {s} idx p w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} idx p (SRWconst [j+16] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRWconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVHstoreidx {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRWconst {
+                       break
+               }
+               if x_2.AuxInt != j+16 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWBRstore(v *Value) bool {
+       // match: (MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstore [i-4] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVWBRstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstore [i-4] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVWBRstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWBRstoreidx(v *Value) bool {
+       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} idx p (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} idx p (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} idx p w mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} idx p w0:(SRDconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWBRstoreidx [i] {s} idx p (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} idx p w0:(SRDconst [j-32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpS390XSRDconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWBRstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWZload(v *Value) bool {
+       // match: (MOVWZload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVWZreg x)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVWstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWZreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVWZload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWZload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWZload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZload [off] {sym} (ADD ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWZloadidx [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWZloadidx(v *Value) bool {
+       // match: (MOVWZloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
+       // cond:
+       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZloadidx [c] {sym} idx (ADDconst [d] ptr) mem)
+       // cond:
+       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
+       // cond:
+       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZloadidx [c] {sym} (ADDconst [d] idx) ptr mem)
+       // cond:
+       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWZloadidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWZreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWZreg x:(MOVBZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVHZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVWZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(Arg <t>))
+       // cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpArg {
+                       break
+               }
+               t := x.Type
+               if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)) {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVBZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVHZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVWZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWZreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(uint32(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(uint32(c))
+               return true
+       }
+       // match: (MOVWZreg x:(MOVWZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWload(v *Value) bool {
+       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVWload  [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWreg(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWreg x:(MOVBload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHZload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWload {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(Arg <t>))
+       // cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpArg {
+                       break
+               }
+               t := x.Type
+               if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)) {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBZreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVBZreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVHreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWreg {
+                       break
+               }
+               v.reset(OpS390XMOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(int32(c))
+               return true
+       }
+       // match: (MOVWreg x:(MOVWZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool {
+       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVWreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWZreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is20Bit(off1+off2)
+       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is20Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVDconst [c]) mem)
+       // cond: validOff(off) && int64(int16(c)) == c && ptr.Op != OpSB
+       // result: (MOVWstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off) && int64(int16(c)) == c && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreconst)
+               v.AuxInt = makeValAndOff(int64(int32(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddridx {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWstoreidx [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADD {
+                       break
+               }
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVDstore [i-4] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
+       // result: (MOVDstore [i-4] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w0 := v.Args[1]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_1.AuxInt != j+32 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && is20Bit(i-4)   && clobber(x)
+       // result: (STM2 [i-4] {s} p w0 w1 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w1 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XMOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               mem := x.Args[2]
+               if !(p.Op != OpSB && x.Uses == 1 && is20Bit(i-4) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTM2)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
+       // cond: x.Uses == 1   && is20Bit(i-8)   && clobber(x)
+       // result: (STM3 [i-8] {s} p w0 w1 w2 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w2 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XSTM2 {
+                       break
+               }
+               if x.AuxInt != i-8 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               w1 := x.Args[2]
+               mem := x.Args[3]
+               if !(x.Uses == 1 && is20Bit(i-8) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTM3)
+               v.AuxInt = i - 8
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(w2)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
+       // cond: x.Uses == 1   && is20Bit(i-12)   && clobber(x)
+       // result: (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               w3 := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpS390XSTM3 {
+                       break
+               }
+               if x.AuxInt != i-12 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               w1 := x.Args[2]
+               w2 := x.Args[3]
+               mem := x.Args[4]
+               if !(x.Uses == 1 && is20Bit(i-12) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTM4)
+               v.AuxInt = i - 12
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(w1)
+               v.AddArg(w2)
+               v.AddArg(w3)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWstoreconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDaddr {
+                       break
+               }
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
+               }
+               v.reset(OpS390XMOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
+       // cond: p.Op != OpSB   && x.Uses == 1   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()   && clobber(x)
+       // result: (MOVDstore [ValAndOff(a).Off()] {s} p (MOVDconst [ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32]) mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpS390XMOVWstoreconst {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               mem := x.Args[1]
+               if !(p.Op != OpSB && x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstore)
+               v.AuxInt = ValAndOff(a).Off()
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDconst, types.UInt64)
+               v0.AuxInt = ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWstoreidx(v *Value) bool {
+       // match: (MOVWstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
+       // cond:
+       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [c] {sym} idx (ADDconst [d] ptr) val mem)
+       // cond:
+       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [c] {sym} (ADDconst [d] idx) ptr val mem)
+       // cond:
+       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpS390XMOVWstoreidx)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} idx p (SRDconst [32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} idx p w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} idx p w x:(MOVWstoreidx [i-4] {s} idx p (SRDconst [32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != 32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} idx p (SRDconst [j+32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p := v.Args[0]
+               idx := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx [i] {s} idx p w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} idx p (SRDconst [j+32] w) mem))
+       // cond: x.Uses == 1   && clobber(x)
+       // result: (MOVDstoreidx [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               idx := v.Args[0]
+               p := v.Args[1]
+               w0 := v.Args[2]
+               if w0.Op != OpS390XSRDconst {
+                       break
+               }
+               j := w0.AuxInt
+               w := w0.Args[0]
+               x := v.Args[3]
+               if x.Op != OpS390XMOVWstoreidx {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpS390XSRDconst {
+                       break
+               }
+               if x_2.AuxInt != j+32 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XMOVDstoreidx)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMULLD(v *Value) bool {
+       // match: (MULLD x (MOVDconst [c]))
+       // cond: is32Bit(c)
+       // result: (MULLDconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpS390XMULLDconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLD (MOVDconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (MULLDconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpS390XMULLDconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLD <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLD <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLD <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLD <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLDload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLDload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMULLDconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULLDconst [-1] x)
+       // cond:
+       // result: (NEG x)
+       for {
+               if v.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpS390XNEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLDconst [0] _)
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULLDconst [1] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLDconst [c] x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLDconst [log2(c)] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpS390XSLDconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLDconst [c] x)
+       // cond: isPowerOfTwo(c+1) && c >= 15
+       // result: (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c+1) && c >= 15) {
+                       break
+               }
+               v.reset(OpS390XSUB)
+               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLDconst [c] x)
+       // cond: isPowerOfTwo(c-1) && c >= 17
+       // result: (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-1) && c >= 17) {
+                       break
+               }
+               v.reset(OpS390XADD)
+               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLDconst [c] (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c*d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c * d
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMULLW(v *Value) bool {
+       // match: (MULLW x (MOVDconst [c]))
+       // cond:
+       // result: (MULLWconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpS390XMULLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLW (MOVDconst [c]) x)
+       // cond:
+       // result: (MULLWconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpS390XMULLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULLW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (MULLWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XMULLWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XMULLWconst(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULLWconst [-1] x)
+       // cond:
+       // result: (NEGW x)
+       for {
+               if v.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpS390XNEGW)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLWconst [0] _)
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULLWconst [1] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLWconst [c] x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLWconst [log2(c)] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpS390XSLWconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLWconst [c] x)
+       // cond: isPowerOfTwo(c+1) && c >= 15
+       // result: (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c+1) && c >= 15) {
+                       break
+               }
+               v.reset(OpS390XSUBW)
+               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLWconst [c] x)
+       // cond: isPowerOfTwo(c-1) && c >= 17
+       // result: (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-1) && c >= 17) {
+                       break
+               }
+               v.reset(OpS390XADDW)
+               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLWconst [c] (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c*d))])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(int32(c * d))
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XNEG(v *Value) bool {
+       // match: (NEG (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [-c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = -c
+               return true
+       }
+       // match: (NEG (ADDconst [c] (NEG x)))
+       // cond: c != -(1<<31)
+       // result: (ADDconst [-c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XADDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpS390XNEG {
+                       break
+               }
+               x := v_0_0.Args[0]
+               if !(c != -(1 << 31)) {
+                       break
+               }
+               v.reset(OpS390XADDconst)
+               v.AuxInt = -c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XNEGW(v *Value) bool {
+       // match: (NEGW (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int32(-c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = int64(int32(-c))
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XNOT(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (NOT x)
+       // cond: true
+       // result: (XOR (MOVDconst [-1]) x)
+       for {
+               x := v.Args[0]
+               if !(true) {
+                       break
+               }
+               v.reset(OpS390XXOR)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDconst, types.UInt64)
+               v0.AuxInt = -1
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XNOTW(v *Value) bool {
+       // match: (NOTW x)
+       // cond: true
+       // result: (XORWconst [-1] x)
+       for {
+               x := v.Args[0]
+               if !(true) {
+                       break
+               }
+               v.reset(OpS390XXORWconst)
+               v.AuxInt = -1
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XOR(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (OR x (MOVDconst [c]))
+       // cond: isU32Bit(c)
+       // result: (ORconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isU32Bit(c)) {
+                       break
+               }
+               v.reset(OpS390XORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (MOVDconst [c]) x)
+       // cond: isU32Bit(c)
+       // result: (ORconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isU32Bit(c)) {
+                       break
+               }
+               v.reset(OpS390XORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (SLDconst x [c]) (SRDconst x [d]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XSLDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpS390XRLLGconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (SRDconst x [d]) (SLDconst x [c]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XSRDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSLDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpS390XRLLGconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c|d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c | d
+               return true
+       }
+       // match: (OR (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c|d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c | d
+               return true
+       }
+       // match: (OR x x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVBZload [i1] {s} p mem) sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)) x1:(MOVBZload [i1] {s} p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVHZload [i1] {s} p mem) sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)) x1:(MOVHZload [i1] {s} p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVWZload [i1] {s} p mem) sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVWZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)) x1:(MOVWZload [i1] {s} p mem))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVWZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)) y) s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))) s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem)) y) s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))) s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR x1:(MOVBZloadidx [i1] {s} p idx mem) sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVBZloadidx [i1] {s} idx p mem) sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVBZloadidx [i1] {s} p idx mem) sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVBZloadidx [i1] {s} idx p mem) sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)) x1:(MOVBZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)) x1:(MOVBZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)) x1:(MOVBZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)) x1:(MOVBZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVHZloadidx [i1] {s} p idx mem) sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVHZloadidx [i1] {s} idx p mem) sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVHZloadidx [i1] {s} p idx mem) sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVHZloadidx [i1] {s} idx p mem) sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)) x1:(MOVHZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)) x1:(MOVHZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)) x1:(MOVHZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)) x1:(MOVHZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVWZloadidx [i1] {s} p idx mem) sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVWZloadidx [i1] {s} idx p mem) sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVWZloadidx [i1] {s} p idx mem) sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR x1:(MOVWZloadidx [i1] {s} idx p mem) sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)) x1:(MOVWZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} idx p mem)) x1:(MOVWZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)) x1:(MOVWZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} idx p mem)) x1:(MOVWZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+4   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVWZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)) or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem)) y) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem)) y) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem)) y) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem)) y) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} idx p mem))) s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR x0:(MOVBZload [i0] {s} p mem) sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)) x0:(MOVBZload [i0] {s} p mem))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))) r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))) r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRload, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)) y))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)) y) s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))) s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))) or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (OR x0:(MOVBZloadidx [i0] {s} p idx mem) sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR x0:(MOVBZloadidx [i0] {s} idx p mem) sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR x0:(MOVBZloadidx [i0] {s} p idx mem) sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR x0:(MOVBZloadidx [i0] {s} idx p mem) sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)) x0:(MOVBZloadidx [i0] {s} p idx mem))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)) x0:(MOVBZloadidx [i0] {s} p idx mem))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)) x0:(MOVBZloadidx [i0] {s} idx p mem))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)) x0:(MOVBZloadidx [i0] {s} idx p mem))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVDEQ _ x (FlagEQ))
-       // cond:
-       // result: x
+       // match: (OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVDEQ y _ (FlagLT))
-       // cond:
-       // result: y
+       // match: (OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
        for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVDEQ y _ (FlagGT))
-       // cond:
-       // result: y
+       // match: (OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} idx p mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
        for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDGE(v *Value) bool {
-       // match: (MOVDGE x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDLE x y cmp)
+       // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))) r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
        for {
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpS390XMOVDLE)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVDGE _ x (FlagEQ))
-       // cond:
-       // result: x
+       // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} idx p mem))) r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))) r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} idx p mem))) r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLDconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVWZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVWBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, types.Int64)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVDGE y _ (FlagLT))
-       // cond:
-       // result: y
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDGE _ x (FlagGT))
-       // cond:
-       // result: x
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
                        break
                }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDGT(v *Value) bool {
-       // match: (MOVDGT x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDLT x y cmp)
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpS390XMOVDLT)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
-               return true
-       }
-       // match: (MOVDGT y _ (FlagEQ))
-       // cond:
-       // result: y
-       for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDGT y _ (FlagLT))
-       // cond:
-       // result: y
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDGT _ x (FlagGT))
-       // cond:
-       // result: x
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDLE(v *Value) bool {
-       // match: (MOVDLE x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDGE x y cmp)
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpS390XMOVDGE)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
-               return true
-       }
-       // match: (MOVDLE _ x (FlagEQ))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDLE _ x (FlagLT))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDLE y _ (FlagGT))
-       // cond:
-       // result: y
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDLT(v *Value) bool {
-       // match: (MOVDLT x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDGT x y cmp)
-       for {
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpS390XMOVDGT)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
-               return true
-       }
-       // match: (MOVDLT y _ (FlagEQ))
-       // cond:
-       // result: y
-       for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       // match: (MOVDLT _ x (FlagLT))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDLT y _ (FlagGT))
-       // cond:
-       // result: y
-       for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDNE(v *Value) bool {
-       // match: (MOVDNE x y (InvertFlags cmp))
-       // cond:
-       // result: (MOVDNE x y cmp)
-       for {
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XInvertFlags {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpS390XMOVDNE)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
-               return true
-       }
-       // match: (MOVDNE y _ (FlagEQ))
-       // cond:
-       // result: y
-       for {
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagEQ {
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDNE _ x (FlagLT))
-       // cond:
-       // result: x
+       // match: (OR s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagLT {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
                        break
                }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDNE _ x (FlagGT))
-       // cond:
-       // result: x
+       // match: (OR or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XFlagGT {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDaddridx(v *Value) bool {
-       // match: (MOVDaddridx [c] {s} (ADDconst [d] x) y)
-       // cond: is20Bit(c+d) && x.Op != OpSB
-       // result: (MOVDaddridx [c+d] {s} x y)
+       // match: (OR or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is20Bit(c+d) && x.Op != OpSB) {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpS390XMOVDaddridx)
-               v.AuxInt = c + d
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (MOVDaddridx [c] {s} x (ADDconst [d] y))
-       // cond: is20Bit(c+d) && y.Op != OpSB
-       // result: (MOVDaddridx [c+d] {s} x y)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is20Bit(c+d) && y.Op != OpSB) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDaddridx)
-               v.AuxInt = c + d
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               if idx != x1.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDaddridx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB
-       // result: (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDaddr {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               y := v_1.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB) {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpS390XMOVDaddridx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDload(v *Value) bool {
-       // match: (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDreg x)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDstore {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDload   [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVDload  [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is20Bit(off1 + off2)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVDload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVDload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               if p != x1.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if idx != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               if mem != x1.Args[2] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XMOVDloadidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDload [off] {sym} (ADD ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVDloadidx [off] {sym} ptr idx mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpS390XMOVDloadidx)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDloadidx(v *Value) bool {
-       // match: (MOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVDloadidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
-       // cond:
-       // result: (MOVDloadidx [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVDloadidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDnop(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVDnop <t> x)
-       // cond: t.Compare(x.Type) == CMPeq
-       // result: x
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if !(t.Compare(x.Type) == CMPeq) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDnop (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = c
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVBZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBZload <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               if idx != x1.Args[1] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x1.Args[2] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBZload, t)
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDnop <t> x:(MOVBload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload  <t> [off] {sym} ptr mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBload {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVHZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZload <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVHload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHload  <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHload {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVWZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZload <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZload {
+               if idx != x1.Args[0] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, t)
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDnop <t> x:(MOVWload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload  <t> [off] {sym} ptr mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWload {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, t)
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDnop <t> x:(MOVDload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVDload  <t> [off] {sym} ptr mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVDload {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, t)
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDnop <t> x:(MOVBZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZloadidx {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBZloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVHZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZloadidx {
+               if mem != x1.Args[2] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, t)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDnop <t> x:(MOVWZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZloadidx {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDnop <t> x:(MOVDloadidx  [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVDloadidx {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVDreg <t> x)
-       // cond: t.Compare(x.Type) == CMPeq
-       // result: x
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if !(t.Compare(x.Type) == CMPeq) {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = c
-               return true
-       }
-       // match: (MOVDreg x)
-       // cond: x.Uses == 1
-       // result: (MOVDnop x)
-       for {
-               x := v.Args[0]
-               if !(x.Uses == 1) {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               v.reset(OpS390XMOVDnop)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVBZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBZload <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x0.Args[0] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBZload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVBload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload  <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBload {
+               if idx != x0.Args[1] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x0.Args[2] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBload, t)
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDreg <t> x:(MOVHZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZload <t> [off] {sym} ptr mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, t)
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDreg <t> x:(MOVHload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHload  <t> [off] {sym} ptr mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHload {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVWZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZload <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZload {
+               if mem != x0.Args[2] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, t)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDreg <t> x:(MOVWload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload  <t> [off] {sym} ptr mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWload {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVDload  [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVDload  <t> [off] {sym} ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVDload {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVBZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZloadidx {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVBZloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVHZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZloadidx {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVWZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZloadidx {
+               if idx != x0.Args[0] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x0.Args[1] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVDreg <t> x:(MOVDloadidx  [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               if x.Op != OpS390XMOVDloadidx {
+               if mem != x0.Args[2] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, t)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDstore(v *Value) bool {
-       // match: (MOVDstore  [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVDstore  [off1+off2] {sym} ptr val mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is20Bit(off1 + off2)) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [off] {sym} ptr (MOVDconst [c]) mem)
-       // cond: validValAndOff(c,off) && int64(int16(c)) == c && ptr.Op != OpSB
-       // result: (MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validValAndOff(c, off) && int64(int16(c)) == c && ptr.Op != OpSB) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               v.reset(OpS390XMOVDstoreconst)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVDstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVDstoreidx [off] {sym} ptr idx val mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && is20Bit(i-8)   && clobber(x)
-       // result: (STMG2 [i-8] {s} p w0 w1 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w1 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVDstore {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x.AuxInt != i-8 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               if x.Aux != s {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               if p != x.Args[0] {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               w0 := x.Args[1]
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && is20Bit(i-8) && clobber(x)) {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               v.reset(OpS390XSTMG2)
-               v.AuxInt = i - 8
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
-       // cond: x.Uses == 1   && is20Bit(i-16)   && clobber(x)
-       // result: (STMG3 [i-16] {s} p w0 w1 w2 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w2 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XSTMG2 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x.AuxInt != i-16 {
+               if p != x0.Args[0] {
                        break
                }
-               if x.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               w0 := x.Args[1]
-               w1 := x.Args[2]
-               mem := x.Args[3]
-               if !(x.Uses == 1 && is20Bit(i-16) && clobber(x)) {
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XSTMG3)
-               v.AuxInt = i - 16
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(w2)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
-       // cond: x.Uses == 1   && is20Bit(i-24)   && clobber(x)
-       // result: (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w3 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XSTMG3 {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if x.AuxInt != i-24 {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.Aux != s {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
                        break
                }
-               w0 := x.Args[1]
-               w1 := x.Args[2]
-               w2 := x.Args[3]
-               mem := x.Args[4]
-               if !(x.Uses == 1 && is20Bit(i-24) && clobber(x)) {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               v.reset(OpS390XSTMG4)
-               v.AuxInt = i - 24
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(w2)
-               v.AddArg(w3)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDstoreconst(v *Value) bool {
-       // match: (MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVDstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               v.reset(OpS390XMOVDstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-       for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVDstoreidx(v *Value) bool {
-       // match: (MOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               if p != x0.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
-       // cond:
-       // result: (MOVDstoreidx [c+d] {sym} ptr idx val mem)
+       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XOR {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHBRstore(v *Value) bool {
-       // match: (MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               if v_1.AuxInt != 16 {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHBRstore {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.AuxInt != i-2 {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if p != x.Args[0] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if w != x.Args[1] {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w0 mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHBRstore {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpS390XSRDconst {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if w0.AuxInt != j-16 {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if w != w0.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRWconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               if v_1.AuxInt != 16 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHBRstore {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.AuxInt != i-2 {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if p != x.Args[0] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if w != x.Args[1] {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w0 mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRWconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHBRstore {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpS390XSRWconst {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if w0.AuxInt != j-16 {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if w != w0.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHBRstoreidx(v *Value) bool {
-       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               if v_2.AuxInt != 16 {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHBRstoreidx {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.AuxInt != i-2 {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if w != x.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XMOVWBRstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       // match: (OR or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem))) y) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHBRstoreidx {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if idx != x.Args[1] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpS390XSRDconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if w0.AuxInt != j-16 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if w != w0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVWBRstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstoreidx [i-2] {s} p idx w mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRWconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               if v_2.AuxInt != 16 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHBRstoreidx {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.AuxInt != i-2 {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if p != x.Args[0] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if idx != x.Args[1] {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if w != x.Args[2] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if idx != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWBRstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
+       // match: (OR or:(OR y s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))) s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRWconst {
+               or := v.Args[0]
+               if or.Op != OpS390XOR {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHBRstoreidx {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLDconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLDconst {
                        break
                }
-               if idx != x.Args[1] {
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpS390XSRWconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if w0.AuxInt != j-16 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if w != w0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVWBRstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XOR, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueS390X_OpS390XMOVHZload(v *Value) bool {
-       // match: (MOVHZload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVHZreg x)
+func rewriteValueS390X_OpS390XORW(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (ORW x (MOVDconst [c]))
+       // cond:
+       // result: (ORWconst [c] x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVHstore {
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpS390XORWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORW (MOVDconst [c]) x)
+       // cond:
+       // result: (ORWconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpS390XORWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORW (SLWconst x [c]) (SRWconst x [d]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XSLWconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSRWconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpS390XRLLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORW (SRWconst x [d]) (SLWconst x [c]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XSRWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XSLWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if !(d == 32-c) {
                        break
                }
-               v.reset(OpS390XMOVHZreg)
+               v.reset(OpS390XRLLconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (MOVHZload  [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVHZload [off1+off2] {sym} ptr mem)
+       // match: (ORW x x)
+       // cond:
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               x := v.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is20Bit(off1 + off2)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
                        break
                }
-               v.reset(OpS390XMOVHZload)
-               v.AuxInt = off1 + off2
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
                v.Aux = sym
+               v.AddArg(x)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHZload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVHZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (ORW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
                        break
                }
-               v.reset(OpS390XMOVHZload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVHZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (ORW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
                        break
                }
-               v.reset(OpS390XMOVHZloadidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHZload [off] {sym} (ADD ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVHZloadidx [off] {sym} ptr idx mem)
+       // match: (ORW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
                        break
                }
-               v.reset(OpS390XMOVHZloadidx)
+               v.reset(OpS390XORWload)
+               v.Type = t
                v.AuxInt = off
                v.Aux = sym
+               v.AddArg(x)
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHZloadidx(v *Value) bool {
-       // match: (MOVHZloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       // match: (ORW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVHZloadidx)
-               v.AuxInt = c + d
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
                v.Aux = sym
+               v.AddArg(x)
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHZloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
-       // cond:
-       // result: (MOVHZloadidx [c+d] {sym} ptr idx mem)
+       // match: (ORW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVHZloadidx)
-               v.AuxInt = c + d
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
                v.Aux = sym
+               v.AddArg(x)
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHZreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVHZreg x:(MOVBZload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (ORW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHZreg x:(MOVHZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
                        break
                }
-               v.reset(OpS390XMOVDreg)
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHZreg x:(Arg <t>))
-       // cond: (is8BitInt(t) || is16BitInt(t)) && !isSigned(t)
-       // result: (MOVDreg x)
+       // match: (ORW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (ORWload <t> [off] {sym} x ptr mem)
        for {
+               t := v.Type
                x := v.Args[0]
-               if x.Op != OpArg {
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
                        break
                }
-               t := x.Type
-               if !((is8BitInt(t) || is16BitInt(t)) && !isSigned(t)) {
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
                        break
                }
-               v.reset(OpS390XMOVDreg)
+               v.reset(OpS390XORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHZreg x:(MOVBZreg _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (ORW x1:(MOVBZload [i1] {s} p mem) sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZreg {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHZreg x:(MOVHZreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZreg {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHZreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(uint16(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(uint16(c))
-               return true
-       }
-       // match: (MOVHZreg x:(MOVHZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, v.Type)
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
                v0.AddArg(mem)
                return true
        }
-       // match: (MOVHZreg x:(MOVHZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHZloadidx <v.Type> [off] {sym} ptr idx mem)
+       // match: (ORW sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)) x1:(MOVBZload [i1] {s} p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZloadidx {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if sh.AuxInt != 8 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, v.Type)
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHload(v *Value) bool {
-       // match: (MOVHload   [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVHload  [off1+off2] {sym} ptr mem)
+       // match: (ORW x1:(MOVHZload [i1] {s} p mem) sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZload {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is20Bit(off1 + off2)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVHload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               if sh.AuxInt != 16 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZload {
                        break
                }
-               v.reset(OpS390XMOVHload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVHreg x:(MOVBload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               if p != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVHload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHload {
+               if mem != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(Arg <t>))
-       // cond: (is8BitInt(t) || is16BitInt(t)) && isSigned(t)
-       // result: (MOVDreg x)
+       // match: (ORW sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)) x1:(MOVHZload [i1] {s} p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpArg {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               t := x.Type
-               if !((is8BitInt(t) || is16BitInt(t)) && isSigned(t)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBreg {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZload {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(MOVBZreg _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)) or:(ORW s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZreg {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHreg {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int16(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(int16(c))
-               return true
-       }
-       // match: (MOVHreg x:(MOVHZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVHload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if p != x1.Args[0] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHload, v.Type)
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHstore(v *Value) bool {
-       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)) or:(ORW y s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVHreg {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} ptr (MOVHZreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVHZreg {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore  [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVHstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is20Bit(off1 + off2)) {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} ptr (MOVDconst [c]) mem)
-       // cond: validOff(off) && ptr.Op != OpSB
-       // result: (MOVHstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off) && ptr.Op != OpSB) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVHstoreconst)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVHstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               if p != x1.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if mem != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVHstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (ORW or:(ORW s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)) y) s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVHstoreidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVHstoreidx [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVHstoreidx)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHstore {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW y s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))) s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x.AuxInt != i-2 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               if p != x.Args[0] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRDconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               if x_1.AuxInt != 16 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if w != x_1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if mem != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w0 mem)
+       // match: (ORW x1:(MOVBZloadidx [i1] {s} p idx mem) sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w0 := v.Args[1]
-               if w0.Op != OpS390XSRDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHstore {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               if sh.AuxInt != 8 {
                        break
                }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRDconst {
+               if p != x0.Args[0] {
                        break
                }
-               if x_1.AuxInt != j+16 {
+               if idx != x0.Args[1] {
                        break
                }
-               if w != x_1.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w mem)
+       // match: (ORW x1:(MOVBZloadidx [i1] {s} idx p mem) sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHstore {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.AuxInt != i-2 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.Aux != s {
+               if sh.AuxInt != 8 {
                        break
                }
-               if p != x.Args[0] {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRWconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x_1.AuxInt != 16 {
+               if p != x0.Args[0] {
                        break
                }
-               if w != x_1.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w0 mem)
+       // match: (ORW x1:(MOVBZloadidx [i1] {s} p idx mem) sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w0 := v.Args[1]
-               if w0.Op != OpS390XSRWconst {
-                       break
-               }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVHstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.Aux != s {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if p != x.Args[0] {
+               if sh.AuxInt != 8 {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRWconst {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x_1.AuxInt != j+16 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if w != x_1.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if p != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHstoreconst(v *Value) bool {
-       // match: (MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVHstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               if mem != x0.Args[2] {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVHstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVHstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (ORW x1:(MOVBZloadidx [i1] {s} idx p mem) sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVHstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVWstoreconst [makeValAndOff(ValAndOff(c).Val()&0xffff | ValAndOff(a).Val()<<16, ValAndOff(a).Off())] {s} p mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpS390XMOVHstoreconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               mem := x.Args[1]
-               if !(p.Op != OpSB && x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWstoreconst)
-               v.AuxInt = makeValAndOff(ValAndOff(c).Val()&0xffff|ValAndOff(a).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVHstoreidx(v *Value) bool {
-       // match: (MOVHstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               if p != x0.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVHstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
-       // cond:
-       // result: (MOVHstoreidx [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               if mem != x0.Args[2] {
                        break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVHstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               }
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       // match: (ORW sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)) x1:(MOVBZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHstoreidx {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               if sh.AuxInt != 8 {
                        break
                }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRDconst {
+               if p != x1.Args[0] {
                        break
                }
-               if x_2.AuxInt != 16 {
+               if idx != x1.Args[1] {
                        break
                }
-               if w != x_2.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       // match: (ORW sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)) x1:(MOVBZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w0 := v.Args[2]
-               if w0.Op != OpS390XSRDconst {
-                       break
-               }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHstoreidx {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               if sh.AuxInt != 8 {
                        break
                }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRDconst {
+               if p != x1.Args[0] {
                        break
                }
-               if x_2.AuxInt != j+16 {
+               if idx != x1.Args[1] {
                        break
                }
-               if w != x_2.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx [i-2] {s} p idx w mem)
+       // match: (ORW sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)) x1:(MOVBZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHstoreidx {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               if sh.AuxInt != 8 {
                        break
                }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRWconst {
+               if idx != x1.Args[0] {
                        break
                }
-               if x_2.AuxInt != 16 {
+               if p != x1.Args[1] {
                        break
                }
-               if w != x_2.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVWstoreidx [i-2] {s} p idx w0 mem)
+       // match: (ORW sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} idx p mem)) x1:(MOVBZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w0 := v.Args[2]
-               if w0.Op != OpS390XSRWconst {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVHstoreidx {
+               if sh.AuxInt != 8 {
                        break
                }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRWconst {
+               if idx != x1.Args[0] {
                        break
                }
-               if x_2.AuxInt != j+16 {
+               if p != x1.Args[1] {
                        break
                }
-               if w != x_2.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWBRstore(v *Value) bool {
-       // match: (MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDBRstore [i-4] {s} p w mem)
+       // match: (ORW x1:(MOVHZloadidx [i1] {s} p idx mem) sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if v_1.AuxInt != 32 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVWBRstore {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x.AuxInt != i-4 {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if w != x.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDBRstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDBRstore [i-4] {s} p w0 mem)
+       // match: (ORW x1:(MOVHZloadidx [i1] {s} idx p mem) sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVWBRstore {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-4 {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpS390XSRDconst {
+               if p != x0.Args[0] {
                        break
                }
-               if w0.AuxInt != j-32 {
+               if idx != x0.Args[1] {
                        break
                }
-               if w != w0.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVDBRstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWBRstoreidx(v *Value) bool {
-       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDBRstoreidx [i-4] {s} p idx w mem)
+       // match: (ORW x1:(MOVHZloadidx [i1] {s} p idx mem) sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if v_2.AuxInt != 32 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVWBRstoreidx {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x.AuxInt != i-4 {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if w != x.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVDBRstoreidx)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+       // match: (ORW x1:(MOVHZloadidx [i1] {s} idx p mem) sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpS390XSRDconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVWBRstoreidx {
+               x1 := v.Args[0]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if x.AuxInt != i-4 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x.Aux != s {
+               if sh.AuxInt != 16 {
                        break
                }
-               if p != x.Args[0] {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpS390XSRDconst {
+               if idx != x0.Args[0] {
                        break
                }
-               if w0.AuxInt != j-32 {
+               if p != x0.Args[1] {
                        break
                }
-               if w != w0.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XMOVDBRstoreidx)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWZload(v *Value) bool {
-       // match: (MOVWZload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVWZreg x)
+       // match: (ORW sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)) x1:(MOVHZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVWstore {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpS390XMOVWZreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZload  [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVWZload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is20Bit(off1 + off2)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               v.reset(OpS390XMOVWZload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWZload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWZload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               if idx != x1.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVWZloadidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWZload [off] {sym} (ADD ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWZloadidx [off] {sym} ptr idx mem)
+       // match: (ORW sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)) x1:(MOVHZloadidx [i1] {s} p idx mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpS390XMOVWZloadidx)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWZloadidx(v *Value) bool {
-       // match: (MOVWZloadidx [c] {sym} (ADDconst [d] ptr) idx mem)
-       // cond:
-       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVWZloadidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWZloadidx [c] {sym} ptr (ADDconst [d] idx) mem)
-       // cond:
-       // result: (MOVWZloadidx [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVWZloadidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWZreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWZreg x:(MOVBZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg x:(MOVHZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               if idx != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg x:(MOVWZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZload {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWZreg x:(Arg <t>))
-       // cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)
-       // result: (MOVDreg x)
+       // match: (ORW sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)) x1:(MOVHZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpArg {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               t := x.Type
-               if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg x:(MOVBZreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZreg {
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg x:(MOVHZreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZreg {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg x:(MOVWZreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZreg {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWZreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(uint32(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if idx != x1.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(uint32(c))
-               return true
-       }
-       // match: (MOVWZreg x:(MOVWZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZload {
+               if p != x1.Args[1] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x1.Args[2] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, v.Type)
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
+       // match: (ORW sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} idx p mem)) x1:(MOVHZloadidx [i1] {s} idx p mem))
+       // cond: i1 == i0+2   && p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZloadidx {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, v.Type)
+               x0 := sh.Args[0]
+               if x0.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
                v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWload(v *Value) bool {
-       // match: (MOVWload   [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVWload  [off1+off2] {sym} ptr mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is20Bit(off1 + off2)) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWreg(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWreg x:(MOVBload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBload {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHload {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHZload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHZload {
+               if idx != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWload {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWreg x:(Arg <t>))
-       // cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)
-       // result: (MOVDreg x)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               x := v.Args[0]
-               if x.Op != OpArg {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               t := x.Type
-               if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBreg {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBZreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVBZreg {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHreg {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVHreg {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWreg {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if idx != x1.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(int32(c))
-               return true
-       }
-       // match: (MOVWreg x:(MOVWZload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XMOVWZload {
+               if mem != x1.Args[2] {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWload, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool {
-       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVWreg {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWZreg x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVWZreg {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore  [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is20Bit(off1+off2)
-       // result: (MOVWstore  [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is20Bit(off1 + off2)) {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVDconst [c]) mem)
-       // cond: validOff(off) && int64(int16(c)) == c && ptr.Op != OpSB
-       // result: (MOVWstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off) && int64(int16(c)) == c && ptr.Op != OpSB) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpS390XMOVWstoreconst)
-               v.AuxInt = makeValAndOff(int64(int32(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               if idx != x1.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if p != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddridx {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWstoreidx [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADD {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               if idx != x1.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVDstore [i-4] {s} p w mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if v_1.AuxInt != 32 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVWstore {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x.AuxInt != i-4 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if w != x.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if idx != x1.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && clobber(x)
-       // result: (MOVDstore [i-4] {s} p w0 mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w0 := v.Args[1]
-               if w0.Op != OpS390XSRDconst {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVWstore {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.AuxInt != i-4 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x.Aux != s {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if p != x.Args[0] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRDconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x_1.AuxInt != j+32 {
+               if p != x1.Args[0] {
                        break
                }
-               if w != x_1.Args[0] {
+               if idx != x1.Args[1] {
                        break
                }
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && is20Bit(i-4)   && clobber(x)
-       // result: (STM2 [i-4] {s} p w0 w1 mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w1 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XMOVWstore {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-4 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if p != x.Args[0] {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               w0 := x.Args[1]
-               mem := x.Args[2]
-               if !(p.Op != OpSB && x.Uses == 1 && is20Bit(i-4) && clobber(x)) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XSTM2)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
-       // cond: x.Uses == 1   && is20Bit(i-8)   && clobber(x)
-       // result: (STM3 [i-8] {s} p w0 w1 w2 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w2 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XSTM2 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x.AuxInt != i-8 {
+               if idx != x1.Args[0] {
                        break
                }
-               if x.Aux != s {
+               if p != x1.Args[1] {
                        break
                }
-               if p != x.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               w0 := x.Args[1]
-               w1 := x.Args[2]
-               mem := x.Args[3]
-               if !(x.Uses == 1 && is20Bit(i-8) && clobber(x)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XSTM3)
-               v.AuxInt = i - 8
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(w2)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
-       // cond: x.Uses == 1   && is20Bit(i-12)   && clobber(x)
-       // result: (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
+       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               w3 := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpS390XSTM3 {
+               s0 := v.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-12 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if p != x.Args[0] {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               w0 := x.Args[1]
-               w1 := x.Args[2]
-               w2 := x.Args[3]
-               mem := x.Args[4]
-               if !(x.Uses == 1 && is20Bit(i-12) && clobber(x)) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XSTM4)
-               v.AuxInt = i - 12
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(w1)
-               v.AddArg(w2)
-               v.AddArg(w3)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWstoreconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       // match: (ORW or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               sc := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-       for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDaddr {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-       // cond: p.Op != OpSB   && x.Uses == 1   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()   && clobber(x)
-       // result: (MOVDstore [ValAndOff(a).Off()] {s} p (MOVDconst [ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32]) mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpS390XMOVWstoreconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               mem := x.Args[1]
-               if !(p.Op != OpSB && x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = ValAndOff(a).Off()
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDconst, types.UInt64)
-               v0.AuxInt = ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMOVWstoreidx(v *Value) bool {
-       // match: (MOVWstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem)
-       // cond:
-       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               if mem != x0.Args[2] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem)
-       // cond:
-       // result: (MOVWstoreidx [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XADDconst {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpS390XMOVWstoreidx)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDstoreidx [i-4] {s} p idx w mem)
+       // match: (ORW or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVWstoreidx {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x.AuxInt != i-4 {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x.Args[0] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if idx != x.Args[1] {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRDconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x_2.AuxInt != 32 {
+               if p != x0.Args[0] {
                        break
                }
-               if w != x_2.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
-       // cond: x.Uses == 1   && clobber(x)
-       // result: (MOVDstoreidx [i-4] {s} p idx w0 mem)
+       // match: (ORW or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v.Args[0]
-               idx := v.Args[1]
-               w0 := v.Args[2]
-               if w0.Op != OpS390XSRDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               j := w0.AuxInt
-               w := w0.Args[0]
-               x := v.Args[3]
-               if x.Op != OpS390XMOVWstoreidx {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x.AuxInt != i-4 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x.Aux != s {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if p != x.Args[0] {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x.Args[1] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpS390XSRDconst {
+               if p != x0.Args[0] {
                        break
                }
-               if x_2.AuxInt != j+32 {
+               if idx != x0.Args[1] {
                        break
                }
-               if w != x_2.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XMOVDstoreidx)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueS390X_OpS390XMULLD(v *Value) bool {
-       // match: (MULLD x (MOVDconst [c]))
-       // cond: is32Bit(c)
-       // result: (MULLDconst [c] x)
+       // match: (ORW or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               c := v_1.AuxInt
-               if !(is32Bit(c)) {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpS390XMULLDconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLD (MOVDconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (MULLDconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if p != x0.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(is32Bit(c)) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XMULLDconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLD <t> x g:(MOVDload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLDload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVDload {
+               if mem != x0.Args[2] {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               v.reset(OpS390XMULLDload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MULLD <t> g:(MOVDload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLDload <t> [off] {sym} x ptr mem)
+       // match: (ORW or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) y) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVDload {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMULLDload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMULLDconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULLDconst [-1] x)
-       // cond:
-       // result: (NEG x)
-       for {
-               if v.AuxInt != -1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpS390XNEG)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [0] _)
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               if v.AuxInt != 0 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MULLDconst [1] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLDconst [log2(c)] x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpS390XSLDconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XSUB)
-               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
-               v0.AuxInt = log2(c + 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               if p != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XADD)
-               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c*d])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if mem != x0.Args[2] {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = c * d
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMULLW(v *Value) bool {
-       // match: (MULLW x (MOVDconst [c]))
-       // cond:
-       // result: (MULLWconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpS390XMULLWconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MULLW (MOVDconst [c]) x)
-       // cond:
-       // result: (MULLWconst [c] x)
+       // match: (ORW or:(ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) y) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpS390XMULLWconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLW <t> x g:(MOVWload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVWload {
+               s1 := or.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMULLWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MULLW <t> g:(MOVWload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVWload {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XMULLWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MULLW <t> x g:(MOVWZload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVWZload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XMULLWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (MULLWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVWZload {
+               if p != x0.Args[1] {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XMULLWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XMULLWconst(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULLWconst [-1] x)
-       // cond:
-       // result: (NEGW x)
-       for {
-               if v.AuxInt != -1 {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpS390XNEGW)
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MULLWconst [0] _)
-       // cond:
-       // result: (MOVDconst [0])
+       // match: (ORW or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               if v.AuxInt != 0 {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MULLWconst [1] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 1 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLWconst [log2(c)] x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c)) {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XSLWconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XSUBW)
-               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
-               v0.AuxInt = log2(c + 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XADDW)
-               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (MULLWconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c*d))])
+       // match: (ORW or:(ORW y s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem))) s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(int32(c * d))
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XNEG(v *Value) bool {
-       // match: (NEG (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [-c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = -c
-               return true
-       }
-       // match: (NEG (ADDconst [c] (NEG x)))
-       // cond: c != -(1<<31)
-       // result: (ADDconst [-c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XADDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               c := v_0.AuxInt
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpS390XNEG {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x := v_0_0.Args[0]
-               if !(c != -(1 << 31)) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XADDconst)
-               v.AuxInt = -c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XNEGW(v *Value) bool {
-       // match: (NEGW (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int32(-c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64(int32(-c))
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XNOT(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (NOT x)
-       // cond: true
-       // result: (XOR (MOVDconst [-1]) x)
-       for {
-               x := v.Args[0]
-               if !(true) {
+               if idx != x0.Args[0] {
                        break
                }
-               v.reset(OpS390XXOR)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDconst, types.UInt64)
-               v0.AuxInt = -1
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XNOTW(v *Value) bool {
-       // match: (NOTW x)
-       // cond: true
-       // result: (XORWconst [-1] x)
-       for {
-               x := v.Args[0]
-               if !(true) {
+               if p != x0.Args[1] {
                        break
                }
-               v.reset(OpS390XXORWconst)
-               v.AuxInt = -1
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XOR(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (OR x (MOVDconst [c]))
-       // cond: isU32Bit(c)
-       // result: (ORconst [c] x)
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW x0:(MOVBZload [i0] {s} p mem) sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               c := v_1.AuxInt
-               if !(isU32Bit(c)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR (MOVDconst [c]) x)
-       // cond: isU32Bit(c)
-       // result: (ORconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isU32Bit(c)) {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               v.reset(OpS390XORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( OR (SLDconst x [c]) (SRDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [   c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XSLDconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRDconst {
+               if p != x1.Args[0] {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               if mem != x1.Args[1] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpS390XRLLGconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: ( OR (SRDconst x [c]) (SLDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [64-c] x)
+       // match: (ORW sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)) x0:(MOVBZload [i0] {s} p mem))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XSRDconst {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSLDconst {
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               if x != v_1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpS390XRLLGconst)
-               v.AuxInt = 64 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c|d])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               if p != x0.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
+               if mem != x0.Args[1] {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = c | d
-               return true
-       }
-       // match: (OR x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> x g:(MOVDload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORload <t> [off] {sym} x ptr mem)
+       // match: (ORW r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)) sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
        for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVDload {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
                        break
                }
-               v.reset(OpS390XORload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (OR <t> g:(MOVDload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVDload {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               if sh.AuxInt != 16 {
                        break
                }
-               v.reset(OpS390XORload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (OR  x:(SLDconst _) y)
-       // cond: y.Op != OpS390XSLDconst
-       // result: (OR  y x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XSLDconst {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               y := v.Args[1]
-               if !(y.Op != OpS390XSLDconst) {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
                        break
                }
-               v.reset(OpS390XOR)
-               v.AddArg(y)
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR                       x0:(MOVBZload [i]   {s} p mem)     s0:(SLDconst [8]  x1:(MOVBZload [i+1] {s} p mem)))     s1:(SLDconst [16] x2:(MOVBZload [i+2] {s} p mem)))     s2:(SLDconst [24] x3:(MOVBZload [i+3] {s} p mem)))     s3:(SLDconst [32] x4:(MOVBZload [i+4] {s} p mem)))     s4:(SLDconst [40] x5:(MOVBZload [i+5] {s} p mem)))     s5:(SLDconst [48] x6:(MOVBZload [i+6] {s} p mem)))     s6:(SLDconst [56] x7:(MOVBZload [i+7] {s} p mem)))
-       // cond: p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRload [i] {s} p mem)
-       for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XOR {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               o1 := o0.Args[0]
-               if o1.Op != OpS390XOR {
+               if p != x1.Args[0] {
                        break
                }
-               o2 := o1.Args[0]
-               if o2.Op != OpS390XOR {
+               if mem != x1.Args[1] {
                        break
                }
-               o3 := o2.Args[0]
-               if o3.Op != OpS390XOR {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               o4 := o3.Args[0]
-               if o4.Op != OpS390XOR {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))) r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               o5 := o4.Args[0]
-               if o5.Op != OpS390XOR {
+               if sh.AuxInt != 16 {
                        break
                }
-               x0 := o5.Args[0]
-               if x0.Op != OpS390XMOVBZload {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o5.Args[1]
-               if s0.Op != OpS390XSLDconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRload {
                        break
                }
-               if s0.AuxInt != 8 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRload {
                        break
                }
-               if x1.AuxInt != i+1 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x1.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               if mem != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[1] {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               s1 := o4.Args[1]
-               if s1.Op != OpS390XSLDconst {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)) or:(ORW s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)) y))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               if s1.AuxInt != 16 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZload {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               if x2.AuxInt != i+2 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x2.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x2.Args[0] {
+               if mem != x0.Args[1] {
                        break
                }
-               if mem != x2.Args[1] {
+               y := or.Args[1]
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpS390XSLDconst {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)) or:(ORW y s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if s2.AuxInt != 24 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x3.AuxInt != i+3 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x3.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               if p != x3.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if mem != x3.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpS390XSLDconst {
+               if mem != x0.Args[1] {
                        break
                }
-               if s3.AuxInt != 32 {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpS390XMOVBZload {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)) y) s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x4.AuxInt != i+4 {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x4.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               if p != x4.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x4.Args[1] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpS390XSLDconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if s4.AuxInt != 40 {
+               if p != x1.Args[0] {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpS390XMOVBZload {
+               if mem != x1.Args[1] {
                        break
                }
-               if x5.AuxInt != i+5 {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if x5.Aux != s {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW y s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))) s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if p != x5.Args[0] {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x5.Args[1] {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZload {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpS390XSLDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if s5.AuxInt != 48 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZload {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x6.AuxInt != i+6 {
+               if p != x1.Args[0] {
                        break
                }
-               if x6.Aux != s {
+               if mem != x1.Args[1] {
                        break
                }
-               if p != x6.Args[0] {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if mem != x6.Args[1] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW x0:(MOVBZloadidx [i0] {s} p idx mem) sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpS390XSLDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if s6.AuxInt != 56 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpS390XMOVBZload {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x7.AuxInt != i+7 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x7.Aux != s {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x7.Args[0] {
+               if idx != x1.Args[1] {
                        break
                }
-               if mem != x7.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRload, types.UInt64)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR                       x0:(MOVBZloadidx [i]   {s} p idx mem)     s0:(SLDconst [8]  x1:(MOVBZloadidx [i+1] {s} p idx mem)))     s1:(SLDconst [16] x2:(MOVBZloadidx [i+2] {s} p idx mem)))     s2:(SLDconst [24] x3:(MOVBZloadidx [i+3] {s} p idx mem)))     s3:(SLDconst [32] x4:(MOVBZloadidx [i+4] {s} p idx mem)))     s4:(SLDconst [40] x5:(MOVBZloadidx [i+5] {s} p idx mem)))     s5:(SLDconst [48] x6:(MOVBZloadidx [i+6] {s} p idx mem)))     s6:(SLDconst [56] x7:(MOVBZloadidx [i+7] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRloadidx <v.Type> [i] {s} p idx mem)
+       // match: (ORW x0:(MOVBZloadidx [i0] {s} idx p mem) sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XOR {
-                       break
-               }
-               o1 := o0.Args[0]
-               if o1.Op != OpS390XOR {
-                       break
-               }
-               o2 := o1.Args[0]
-               if o2.Op != OpS390XOR {
-                       break
-               }
-               o3 := o2.Args[0]
-               if o3.Op != OpS390XOR {
-                       break
-               }
-               o4 := o3.Args[0]
-               if o4.Op != OpS390XOR {
-                       break
-               }
-               o5 := o4.Args[0]
-               if o5.Op != OpS390XOR {
-                       break
-               }
-               x0 := o5.Args[0]
+               x0 := v.Args[0]
                if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
+               idx := x0.Args[0]
+               p := x0.Args[1]
                mem := x0.Args[2]
-               s0 := o5.Args[1]
-               if s0.Op != OpS390XSLDconst {
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := s0.Args[0]
+               x1 := sh.Args[0]
                if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x1.AuxInt != i+1 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -15240,1059 +30632,1381 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               s1 := o4.Args[1]
-               if s1.Op != OpS390XSLDconst {
-                       break
-               }
-               if s1.AuxInt != 16 {
-                       break
-               }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZloadidx {
-                       break
-               }
-               if x2.AuxInt != i+2 {
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if x2.Aux != s {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW x0:(MOVBZloadidx [i0] {s} p idx mem) sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x2.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if idx != x2.Args[1] {
+               if sh.AuxInt != 8 {
                        break
                }
-               if mem != x2.Args[2] {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpS390XSLDconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if s2.AuxInt != 24 {
+               if idx != x1.Args[0] {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpS390XMOVBZloadidx {
+               if p != x1.Args[1] {
                        break
                }
-               if x3.AuxInt != i+3 {
+               if mem != x1.Args[2] {
                        break
                }
-               if x3.Aux != s {
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if p != x3.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW x0:(MOVBZloadidx [i0] {s} idx p mem) sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if idx != x3.Args[1] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x3.Args[2] {
+               if sh.AuxInt != 8 {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpS390XSLDconst {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s3.AuxInt != 32 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpS390XMOVBZloadidx {
+               if idx != x1.Args[0] {
                        break
                }
-               if x4.AuxInt != i+4 {
+               if p != x1.Args[1] {
                        break
                }
-               if x4.Aux != s {
+               if mem != x1.Args[2] {
                        break
                }
-               if p != x4.Args[0] {
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if idx != x4.Args[1] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)) x0:(MOVBZloadidx [i0] {s} p idx mem))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x4.Args[2] {
+               if sh.AuxInt != 8 {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpS390XSLDconst {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s4.AuxInt != 40 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpS390XMOVBZloadidx {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x5.AuxInt != i+5 {
+               if p != x0.Args[0] {
                        break
                }
-               if x5.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x5.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               if idx != x5.Args[1] {
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if mem != x5.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)) x0:(MOVBZloadidx [i0] {s} p idx mem))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpS390XSLDconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               if s5.AuxInt != 48 {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpS390XMOVBZloadidx {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x6.AuxInt != i+6 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x6.Aux != s {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x6.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if idx != x6.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               if mem != x6.Args[2] {
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpS390XSLDconst {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)) x0:(MOVBZloadidx [i0] {s} idx p mem))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if s6.AuxInt != 56 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpS390XMOVBZloadidx {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x7.AuxInt != i+7 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x7.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x7.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x7.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x7.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR                       x0:(MOVBZload [i]   {s} p mem)     s0:(SLDconst [8]  x1:(MOVBZload [i-1] {s} p mem)))     s1:(SLDconst [16] x2:(MOVBZload [i-2] {s} p mem)))     s2:(SLDconst [24] x3:(MOVBZload [i-3] {s} p mem)))     s3:(SLDconst [32] x4:(MOVBZload [i-4] {s} p mem)))     s4:(SLDconst [40] x5:(MOVBZload [i-5] {s} p mem)))     s5:(SLDconst [48] x6:(MOVBZload [i-6] {s} p mem)))     s6:(SLDconst [56] x7:(MOVBZload [i-7] {s} p mem)))
-       // cond: p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload [i-7] {s} p mem)
+       // match: (ORW sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} idx p mem)) x0:(MOVBZloadidx [i0] {s} idx p mem))
+       // cond: p.Op != OpSB   && i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XOR {
-                       break
-               }
-               o1 := o0.Args[0]
-               if o1.Op != OpS390XOR {
-                       break
-               }
-               o2 := o1.Args[0]
-               if o2.Op != OpS390XOR {
-                       break
-               }
-               o3 := o2.Args[0]
-               if o3.Op != OpS390XOR {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               o4 := o3.Args[0]
-               if o4.Op != OpS390XOR {
+               if sh.AuxInt != 8 {
                        break
                }
-               o5 := o4.Args[0]
-               if o5.Op != OpS390XOR {
-                       break
-               }
-               x0 := o5.Args[0]
-               if x0.Op != OpS390XMOVBZload {
+               x1 := sh.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o5.Args[1]
-               if s0.Op != OpS390XSLDconst {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s0.AuxInt != 8 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               if idx != x0.Args[0] {
                        break
                }
-               if x1.AuxInt != i-1 {
+               if p != x0.Args[1] {
                        break
                }
-               if x1.Aux != s {
+               if mem != x0.Args[2] {
                        break
                }
-               if p != x1.Args[0] {
+               if !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               if mem != x1.Args[1] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORW r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)) sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               s1 := o4.Args[1]
-               if s1.Op != OpS390XSLDconst {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if s1.AuxInt != 16 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZload {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x2.AuxInt != i-2 {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if x2.Aux != s {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x2.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if mem != x2.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpS390XSLDconst {
+               if idx != x1.Args[1] {
                        break
                }
-               if s2.AuxInt != 24 {
+               if mem != x1.Args[2] {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpS390XMOVBZload {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               if x3.AuxInt != i-3 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)) sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x3.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x3.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x3.Args[1] {
+               if sh.AuxInt != 16 {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpS390XSLDconst {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if s3.AuxInt != 32 {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x4.AuxInt != i-4 {
+               if p != x1.Args[0] {
                        break
                }
-               if x4.Aux != s {
+               if idx != x1.Args[1] {
                        break
                }
-               if p != x4.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               if mem != x4.Args[1] {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpS390XSLDconst {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)) sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if s4.AuxInt != 40 {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpS390XMOVBZload {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x5.AuxInt != i-5 {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x5.Aux != s {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if p != x5.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if mem != x5.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpS390XSLDconst {
+               if idx != x1.Args[0] {
                        break
                }
-               if s5.AuxInt != 48 {
+               if p != x1.Args[1] {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpS390XMOVBZload {
+               if mem != x1.Args[2] {
                        break
                }
-               if x6.AuxInt != i-6 {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               if x6.Aux != s {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)) sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               r0 := v.Args[0]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if p != x6.Args[0] {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if mem != x6.Args[1] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpS390XSLDconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               if s6.AuxInt != 56 {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpS390XMOVBZload {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x7.AuxInt != i-7 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x7.Aux != s {
+               if idx != x1.Args[0] {
                        break
                }
-               if p != x7.Args[0] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x7.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDload, types.UInt64)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 7
+               v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR                       x0:(MOVBZloadidx [i]   {s} p idx mem)     s0:(SLDconst [8]  x1:(MOVBZloadidx [i-1] {s} p idx mem)))     s1:(SLDconst [16] x2:(MOVBZloadidx [i-2] {s} p idx mem)))     s2:(SLDconst [24] x3:(MOVBZloadidx [i-3] {s} p idx mem)))     s3:(SLDconst [32] x4:(MOVBZloadidx [i-4] {s} p idx mem)))     s4:(SLDconst [40] x5:(MOVBZloadidx [i-5] {s} p idx mem)))     s5:(SLDconst [48] x6:(MOVBZloadidx [i-6] {s} p idx mem)))     s6:(SLDconst [56] x7:(MOVBZloadidx [i-7] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && x3.Uses == 1   && x4.Uses == 1   && x5.Uses == 1   && x6.Uses == 1   && x7.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && s2.Uses == 1   && s3.Uses == 1   && s4.Uses == 1   && s5.Uses == 1   && s6.Uses == 1   && o0.Uses == 1   && o1.Uses == 1   && o2.Uses == 1   && o3.Uses == 1   && o4.Uses == 1   && o5.Uses == 1   && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(x3)   && clobber(x4)   && clobber(x5)   && clobber(x6)   && clobber(x7)   && clobber(s0)   && clobber(s1)   && clobber(s2)   && clobber(s3)   && clobber(s4)   && clobber(s5)   && clobber(s6)   && clobber(o0)   && clobber(o1)   && clobber(o2)   && clobber(o3)   && clobber(o4)   && clobber(o5)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <v.Type> [i-7] {s} p idx mem)
+       // match: (ORW sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XOR {
-                       break
-               }
-               o1 := o0.Args[0]
-               if o1.Op != OpS390XOR {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               o2 := o1.Args[0]
-               if o2.Op != OpS390XOR {
+               if sh.AuxInt != 16 {
                        break
                }
-               o3 := o2.Args[0]
-               if o3.Op != OpS390XOR {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               o4 := o3.Args[0]
-               if o4.Op != OpS390XOR {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               o5 := o4.Args[0]
-               if o5.Op != OpS390XOR {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               x0 := o5.Args[0]
-               if x0.Op != OpS390XMOVBZloadidx {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := o5.Args[1]
-               if s0.Op != OpS390XSLDconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if s0.AuxInt != 8 {
+               if p != x0.Args[0] {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZloadidx {
+               if idx != x0.Args[1] {
                        break
                }
-               if x1.AuxInt != i-1 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x1.Aux != s {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               if p != x1.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if idx != x1.Args[1] {
+               if sh.AuxInt != 16 {
                        break
                }
-               if mem != x1.Args[2] {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               s1 := o4.Args[1]
-               if s1.Op != OpS390XSLDconst {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if s1.AuxInt != 16 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZloadidx {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x2.AuxInt != i-2 {
+               if p != x0.Args[0] {
                        break
                }
-               if x2.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x2.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               if idx != x2.Args[1] {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               if mem != x2.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               s2 := o3.Args[1]
-               if s2.Op != OpS390XSLDconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               if s2.AuxInt != 24 {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               x3 := s2.Args[0]
-               if x3.Op != OpS390XMOVBZloadidx {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if x3.AuxInt != i-3 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if x3.Aux != s {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if p != x3.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x3.Args[1] {
+               if idx != x0.Args[0] {
                        break
                }
-               if mem != x3.Args[2] {
+               if p != x0.Args[1] {
                        break
                }
-               s3 := o2.Args[1]
-               if s3.Op != OpS390XSLDconst {
+               if mem != x0.Args[2] {
                        break
                }
-               if s3.AuxInt != 32 {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               x4 := s3.Args[0]
-               if x4.Op != OpS390XMOVBZloadidx {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} idx p mem))) r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpS390XSLWconst {
                        break
                }
-               if x4.AuxInt != i-4 {
+               if sh.AuxInt != 16 {
                        break
                }
-               if x4.Aux != s {
+               r1 := sh.Args[0]
+               if r1.Op != OpS390XMOVHZreg {
                        break
                }
-               if p != x4.Args[0] {
+               x1 := r1.Args[0]
+               if x1.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               if idx != x4.Args[1] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               r0 := v.Args[1]
+               if r0.Op != OpS390XMOVHZreg {
                        break
                }
-               if mem != x4.Args[2] {
+               x0 := r0.Args[0]
+               if x0.Op != OpS390XMOVHBRloadidx {
                        break
                }
-               s4 := o1.Args[1]
-               if s4.Op != OpS390XSLDconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if s4.AuxInt != 40 {
+               if idx != x0.Args[0] {
                        break
                }
-               x5 := s4.Args[0]
-               if x5.Op != OpS390XMOVBZloadidx {
+               if p != x0.Args[1] {
                        break
                }
-               if x5.AuxInt != i-5 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x5.Aux != s {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
-               if p != x5.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, types.Int32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if idx != x5.Args[1] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if mem != x5.Args[2] {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               s5 := o0.Args[1]
-               if s5.Op != OpS390XSLDconst {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s5.AuxInt != 48 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x6 := s5.Args[0]
-               if x6.Op != OpS390XMOVBZloadidx {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x6.AuxInt != i-6 {
+               if p != x0.Args[0] {
                        break
                }
-               if x6.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x6.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               if idx != x6.Args[1] {
+               y := or.Args[1]
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if mem != x6.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               s6 := v.Args[1]
-               if s6.Op != OpS390XSLDconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s6.AuxInt != 56 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               x7 := s6.Args[0]
-               if x7.Op != OpS390XMOVBZloadidx {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x7.AuxInt != i-7 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x7.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x7.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if idx != x7.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x7.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) {
+               y := or.Args[1]
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVDloadidx, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 7
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueS390X_OpS390XORW(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (ORW x (MOVDconst [c]))
-       // cond:
-       // result: (ORWconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XMOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               v.reset(OpS390XORWconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW (MOVDconst [c]) x)
-       // cond:
-       // result: (ORWconst [c] x)
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XMOVDconst {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpS390XORWconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORW (SLWconst x [c]) (SRWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [   c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XSLWconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSRWconst {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x != v_1.Args[0] {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XRLLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: ( ORW (SRWconst x [c]) (SLWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [32-c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpS390XSRWconst {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpS390XSLWconst {
+               if idx != x0.Args[0] {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               if p != x0.Args[1] {
                        break
                }
-               if x != v_1.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XRLLconst)
-               v.AuxInt = 32 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORW x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               y := or.Args[1]
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW <t> x g:(MOVWload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORWload <t> [off] {sym} x ptr mem)
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVWload {
-                       break
-               }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               v.reset(OpS390XORWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ORW <t> g:(MOVWload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVWload {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               v.reset(OpS390XORWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ORW <t> x g:(MOVWZload [off] {sym} ptr mem))
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               x := v.Args[0]
-               g := v.Args[1]
-               if g.Op != OpS390XMOVWZload {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               v.reset(OpS390XORWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ORW <t> g:(MOVWZload [off] {sym} ptr mem) x)
-       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
-       // result: (ORWload <t> [off] {sym} x ptr mem)
-       for {
-               t := v.Type
-               g := v.Args[0]
-               if g.Op != OpS390XMOVWZload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               off := g.AuxInt
-               sym := g.Aux
-               ptr := g.Args[0]
-               mem := g.Args[1]
-               x := v.Args[1]
-               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+               if idx != x0.Args[0] {
                        break
-               }
-               v.reset(OpS390XORWload)
-               v.Type = t
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ORW x:(SLWconst _) y)
-       // cond: y.Op != OpS390XSLWconst
-       // result: (ORW y x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpS390XSLWconst {
+               }
+               if p != x0.Args[1] {
                        break
                }
-               y := v.Args[1]
-               if !(y.Op != OpS390XSLWconst) {
+               if mem != x0.Args[2] {
                        break
                }
-               v.reset(OpS390XORW)
-               v.AddArg(y)
-               v.AddArg(x)
+               y := or.Args[1]
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW                 x0:(MOVBZload [i]   {s} p mem)     s0:(SLWconst [8] x1:(MOVBZload [i+1] {s} p mem)))
-       // cond: p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i] {s} p mem))
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpS390XMOVBZload {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpS390XSLWconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s0.AuxInt != 8 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x1.AuxInt != i+1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
                        break
                }
-               if !(p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRload, types.UInt16)
-               v1.AuxInt = i
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW o0:(ORW z0:(MOVHZreg x0:(MOVHBRload [i] {s} p mem))     s0:(SLWconst [16] x1:(MOVBZload [i+2] {s} p mem)))     s1:(SLWconst [24] x2:(MOVBZload [i+3] {s} p mem)))
-       // cond: p.Op != OpSB   && z0.Uses == 1   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(z0)   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWBRload [i] {s} p mem)
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XORW {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               z0 := o0.Args[0]
-               if z0.Op != OpS390XMOVHZreg {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x0 := z0.Args[0]
-               if x0.Op != OpS390XMOVHBRload {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o0.Args[1]
+               y := or.Args[0]
+               s0 := or.Args[1]
                if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 16 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if x1.AuxInt != i+2 {
+               if p != x0.Args[0] {
                        break
                }
-               if x1.Aux != s {
+               if idx != x0.Args[1] {
                        break
                }
-               if p != x1.Args[0] {
+               if mem != x0.Args[2] {
                        break
                }
-               if mem != x1.Args[1] {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               s1 := v.Args[1]
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)) or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               s1 := v.Args[0]
                if s1.Op != OpS390XSLWconst {
                        break
                }
-               if s1.AuxInt != 24 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZload {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x2.AuxInt != i+3 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x2.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x2.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if mem != x2.Args[1] {
+               if idx != x0.Args[0] {
                        break
                }
-               if !(p.Op != OpSB && z0.Uses == 1 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(z0) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if p != x0.Args[1] {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWBRload, types.UInt32)
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW                 x0:(MOVBZloadidx [i]   {s} p idx mem)     s0:(SLWconst [8] x1:(MOVBZloadidx [i+1] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx <v.Type> [i] {s} p idx mem))
+       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)) or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpS390XMOVBZloadidx {
+               s1 := v.Args[0]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpS390XSLWconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if s0.AuxInt != 8 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpS390XORW {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZloadidx {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x1.AuxInt != i+1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, v.Type)
-               v1.AuxInt = i
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW o0:(ORW z0:(MOVHZreg x0:(MOVHBRloadidx [i] {s} p idx mem))     s0:(SLWconst [16] x1:(MOVBZloadidx [i+2] {s} p idx mem)))     s1:(SLWconst [24] x2:(MOVBZloadidx [i+3] {s} p idx mem)))
-       // cond: z0.Uses == 1   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(z0)   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWZreg (MOVWBRloadidx <v.Type> [i] {s} p idx mem))
+       // match: (ORW or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XORW {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               z0 := o0.Args[0]
-               if z0.Op != OpS390XMOVHZreg {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x0 := z0.Args[0]
-               if x0.Op != OpS390XMOVHBRloadidx {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               s0 := o0.Args[1]
-               if s0.Op != OpS390XSLWconst {
-                       break
-               }
-               if s0.AuxInt != 16 {
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               x1 := s0.Args[0]
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
                if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x1.AuxInt != i+2 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -16305,305 +32019,495 @@ func rewriteValueS390X_OpS390XORW(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               s1 := v.Args[1]
-               if s1.Op != OpS390XSLWconst {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if s1.AuxInt != 24 {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZloadidx {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x2.AuxInt != i+3 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x2.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x2.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if idx != x2.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x2.Args[2] {
+               if idx != x1.Args[1] {
                        break
                }
-               if !(z0.Uses == 1 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(z0) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if mem != x1.Args[2] {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, types.UInt64)
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, v.Type)
-               v1.AuxInt = i
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW                  x0:(MOVBZload [i]   {s} p mem)     s0:(SLWconst [8] x1:(MOVBZload [i-1] {s} p mem)))
-       // cond: p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVHZload [i-1] {s} p mem)
+       // match: (ORW or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpS390XMOVBZload {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := v.Args[1]
+               y := or.Args[0]
+               s0 := or.Args[1]
                if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x1.AuxInt != i-1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
                if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               if !(p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZload, types.UInt16)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 1
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW o0:(ORW x0:(MOVHZload [i] {s} p mem)     s0:(SLWconst [16] x1:(MOVBZload [i-1] {s} p mem)))     s1:(SLWconst [24] x2:(MOVBZload [i-2] {s} p mem)))
-       // cond: p.Op != OpSB   && x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWZload [i-2] {s} p mem)
+       // match: (ORW or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XORW {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != OpS390XMOVHZload {
-                       break
-               }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s0 := o0.Args[1]
+               y := or.Args[0]
+               s0 := or.Args[1]
                if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 16 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZload {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x1.AuxInt != i-1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
                if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               s1 := v.Args[1]
-               if s1.Op != OpS390XSLWconst {
+               if mem != x1.Args[2] {
                        break
                }
-               if s1.AuxInt != 24 {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZload {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem)) y) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               if x2.AuxInt != i-2 {
+               s0 := or.Args[0]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if x2.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if p != x2.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if mem != x2.Args[1] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if !(p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZload, types.UInt32)
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 2
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW                 x0:(MOVBZloadidx [i]   {s} p idx mem)     s0:(SLWconst [8] x1:(MOVBZloadidx [i-1] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)
-       // result: @mergePoint(b,x0,x1) (MOVHZloadidx <v.Type> [i-1] {s} p idx mem)
+       // match: (ORW or:(ORW s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem)) y) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpS390XMOVBZloadidx {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               i := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s0 := v.Args[1]
+               s0 := or.Args[0]
                if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 8 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZloadidx {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x1.AuxInt != i-1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVHZloadidx, v.Type)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 1
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORW o0:(ORW x0:(MOVHZloadidx [i] {s} p idx mem)     s0:(SLWconst [16] x1:(MOVBZloadidx [i-1] {s} p idx mem)))     s1:(SLWconst [24] x2:(MOVBZloadidx [i-2] {s} p idx mem)))
-       // cond: x0.Uses == 1   && x1.Uses == 1   && x2.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && o0.Uses == 1   && mergePoint(b,x0,x1,x2) != nil   && clobber(x0)   && clobber(x1)   && clobber(x2)   && clobber(s0)   && clobber(s1)   && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWZloadidx <v.Type> [i-2] {s} p idx mem)
+       // match: (ORW or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
        for {
-               o0 := v.Args[0]
-               if o0.Op != OpS390XORW {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               x0 := o0.Args[0]
-               if x0.Op != OpS390XMOVHZloadidx {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               i := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               s0 := o0.Args[1]
-               if s0.Op != OpS390XSLWconst {
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if s0.AuxInt != 16 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x1 := s0.Args[0]
-               if x1.Op != OpS390XMOVBZloadidx {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x1.AuxInt != i-1 {
+               if idx != x1.Args[0] {
                        break
                }
-               if x1.Aux != s {
+               if p != x1.Args[1] {
                        break
                }
-               if p != x1.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               if idx != x1.Args[1] {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               if mem != x1.Args[2] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORW or:(ORW y s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} idx p mem))) s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} idx p mem)))
+       // cond: p.Op != OpSB   && i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpS390XORW {
                        break
                }
-               s1 := v.Args[1]
-               if s1.Op != OpS390XSLWconst {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpS390XSLWconst {
                        break
                }
-               if s1.AuxInt != 24 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpS390XMOVBZloadidx {
                        break
                }
-               x2 := s1.Args[0]
-               if x2.Op != OpS390XMOVBZloadidx {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpS390XSLWconst {
                        break
                }
-               if x2.AuxInt != i-2 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpS390XMOVBZloadidx {
                        break
                }
-               if x2.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x2.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x2.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x2.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)) {
+               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpS390XMOVWZloadidx, v.Type)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpS390XORW, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i - 2
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpS390XMOVHZreg, types.UInt64)
+               v3 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, types.Int16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
        return false
@@ -17440,9 +33344,9 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XOR (SLDconst x [c]) (SRDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [   c] x)
+       // match: (XOR (SLDconst x [c]) (SRDconst x [d]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSLDconst {
@@ -17454,10 +33358,11 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool {
                if v_1.Op != OpS390XSRDconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpS390XRLLGconst)
@@ -17465,28 +33370,29 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XOR (SRDconst x [c]) (SLDconst x [64-c]))
-       // cond:
-       // result: (RLLGconst [64-c] x)
+       // match: (XOR (SRDconst x [d]) (SLDconst x [c]))
+       // cond: d == 64-c
+       // result: (RLLGconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSRDconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpS390XSLDconst {
                        break
                }
-               if v_1.AuxInt != 64-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
                v.reset(OpS390XRLLGconst)
-               v.AuxInt = 64 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -17508,6 +33414,24 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool {
                v.AuxInt = c ^ d
                return true
        }
+       // match: (XOR (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c^d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XMOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XMOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpS390XMOVDconst)
+               v.AuxInt = c ^ d
+               return true
+       }
        // match: (XOR x x)
        // cond:
        // result: (MOVDconst [0])
@@ -17572,6 +33496,58 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (XOR <t> g:(MOVDload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XOR <t> x g:(MOVDload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVDload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XXORW(v *Value) bool {
@@ -17605,9 +33581,9 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORW (SLWconst x [c]) (SRWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [   c] x)
+       // match: (XORW (SLWconst x [c]) (SRWconst x [d]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSLWconst {
@@ -17619,10 +33595,11 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool {
                if v_1.Op != OpS390XSRWconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpS390XRLLconst)
@@ -17630,28 +33607,29 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (XORW (SRWconst x [c]) (SLWconst x [32-c]))
-       // cond:
-       // result: (RLLconst [32-c] x)
+       // match: (XORW (SRWconst x [d]) (SLWconst x [c]))
+       // cond: d == 32-c
+       // result: (RLLconst [c] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpS390XSRWconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpS390XSLWconst {
                        break
                }
-               if v_1.AuxInt != 32-c {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x != v_1.Args[0] {
+               if !(d == 32-c) {
                        break
                }
                v.reset(OpS390XRLLconst)
-               v.AuxInt = 32 - c
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
@@ -17719,6 +33697,58 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (XORW <t> g:(MOVWload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XORW <t> x g:(MOVWload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (XORW <t> x g:(MOVWZload [off] {sym} ptr mem))
        // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
        // result: (XORWload <t> [off] {sym} x ptr mem)
@@ -17771,6 +33801,58 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (XORW <t> g:(MOVWZload [off] {sym} ptr mem) x)
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               g := v.Args[0]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               x := v.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XORW <t> x g:(MOVWZload [off] {sym} ptr mem))
+       // cond: ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
+       // result: (XORWload <t> [off] {sym} x ptr mem)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               g := v.Args[1]
+               if g.Op != OpS390XMOVWZload {
+                       break
+               }
+               off := g.AuxInt
+               sym := g.Aux
+               ptr := g.Args[0]
+               mem := g.Args[1]
+               if !(ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)) {
+                       break
+               }
+               v.reset(OpS390XXORWload)
+               v.Type = t
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XXORWconst(v *Value) bool {
@@ -17876,7 +33958,7 @@ func rewriteValueS390X_OpSelect0(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpSelect1(v *Value) bool {
-       // match: (Select1     (AddTupleFirst32 tuple _  ))
+       // match: (Select1 (AddTupleFirst32 tuple _))
        // cond:
        // result: (Select1 tuple)
        for {
@@ -17889,7 +33971,7 @@ func rewriteValueS390X_OpSelect1(v *Value) bool {
                v.AddArg(tuple)
                return true
        }
-       // match: (Select1     (AddTupleFirst64 tuple _  ))
+       // match: (Select1 (AddTupleFirst64 tuple _))
        // cond:
        // result: (Select1 tuple)
        for {
@@ -17938,7 +34020,7 @@ func rewriteValueS390X_OpSignExt32to64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSignExt8to16(v *Value) bool {
-       // match: (SignExt8to16  x)
+       // match: (SignExt8to16 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -17949,7 +34031,7 @@ func rewriteValueS390X_OpSignExt8to16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSignExt8to32(v *Value) bool {
-       // match: (SignExt8to32  x)
+       // match: (SignExt8to32 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -17960,7 +34042,7 @@ func rewriteValueS390X_OpSignExt8to32(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSignExt8to64(v *Value) bool {
-       // match: (SignExt8to64  x)
+       // match: (SignExt8to64 x)
        // cond:
        // result: (MOVBreg x)
        for {
@@ -18119,7 +34201,7 @@ func rewriteValueS390X_OpStore(v *Value) bool {
        return false
 }
 func rewriteValueS390X_OpSub16(v *Value) bool {
-       // match: (Sub16  x y)
+       // match: (Sub16 x y)
        // cond:
        // result: (SUBW  x y)
        for {
@@ -18132,7 +34214,7 @@ func rewriteValueS390X_OpSub16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSub32(v *Value) bool {
-       // match: (Sub32  x y)
+       // match: (Sub32 x y)
        // cond:
        // result: (SUBW  x y)
        for {
@@ -18158,7 +34240,7 @@ func rewriteValueS390X_OpSub32F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSub64(v *Value) bool {
-       // match: (Sub64  x y)
+       // match: (Sub64 x y)
        // cond:
        // result: (SUB  x y)
        for {
@@ -18184,7 +34266,7 @@ func rewriteValueS390X_OpSub64F(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpSub8(v *Value) bool {
-       // match: (Sub8   x y)
+       // match: (Sub8 x y)
        // cond:
        // result: (SUBW  x y)
        for {
@@ -18210,7 +34292,7 @@ func rewriteValueS390X_OpSubPtr(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpTrunc16to8(v *Value) bool {
-       // match: (Trunc16to8  x)
+       // match: (Trunc16to8 x)
        // cond:
        // result: x
        for {
@@ -18234,7 +34316,7 @@ func rewriteValueS390X_OpTrunc32to16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpTrunc32to8(v *Value) bool {
-       // match: (Trunc32to8  x)
+       // match: (Trunc32to8 x)
        // cond:
        // result: x
        for {
@@ -18270,7 +34352,7 @@ func rewriteValueS390X_OpTrunc64to32(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpTrunc64to8(v *Value) bool {
-       // match: (Trunc64to8  x)
+       // match: (Trunc64to8 x)
        // cond:
        // result: x
        for {
@@ -18321,7 +34403,7 @@ func rewriteValueS390X_OpXor64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpXor8(v *Value) bool {
-       // match: (Xor8  x y)
+       // match: (Xor8 x y)
        // cond:
        // result: (XORW x y)
        for {
@@ -18557,7 +34639,7 @@ func rewriteValueS390X_OpZeroExt32to64(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpZeroExt8to16(v *Value) bool {
-       // match: (ZeroExt8to16  x)
+       // match: (ZeroExt8to16 x)
        // cond:
        // result: (MOVBZreg x)
        for {
@@ -18568,7 +34650,7 @@ func rewriteValueS390X_OpZeroExt8to16(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpZeroExt8to32(v *Value) bool {
-       // match: (ZeroExt8to32  x)
+       // match: (ZeroExt8to32 x)
        // cond:
        // result: (MOVBZreg x)
        for {
@@ -18579,7 +34661,7 @@ func rewriteValueS390X_OpZeroExt8to32(v *Value) bool {
        }
 }
 func rewriteValueS390X_OpZeroExt8to64(v *Value) bool {
-       // match: (ZeroExt8to64  x)
+       // match: (ZeroExt8to64 x)
        // cond:
        // result: (MOVBZreg x)
        for {
index 539c05d4d9f91c5be3ffd0d0fe5442f2c9e99610..9c99b83070ccc403874b6a847c8137abacd8c2a9 100644 (file)
@@ -36,7 +36,7 @@ func rewriteValuedec(v *Value) bool {
        return false
 }
 func rewriteValuedec_OpComplexImag(v *Value) bool {
-       // match: (ComplexImag (ComplexMake _ imag ))
+       // match: (ComplexImag (ComplexMake _ imag))
        // cond:
        // result: imag
        for {
@@ -53,7 +53,7 @@ func rewriteValuedec_OpComplexImag(v *Value) bool {
        return false
 }
 func rewriteValuedec_OpComplexReal(v *Value) bool {
-       // match: (ComplexReal (ComplexMake real _  ))
+       // match: (ComplexReal (ComplexMake real _))
        // cond:
        // result: real
        for {
@@ -276,7 +276,7 @@ func rewriteValuedec_OpSliceLen(v *Value) bool {
        return false
 }
 func rewriteValuedec_OpSlicePtr(v *Value) bool {
-       // match: (SlicePtr (SliceMake ptr _ _ ))
+       // match: (SlicePtr (SliceMake ptr _ _))
        // cond:
        // result: ptr
        for {
index eb4761ae94ed69f709961ecbc33c7aef3278d6cc..a602279d1d9a7fc2fb9945709f9fd74275df1816 100644 (file)
@@ -424,7 +424,7 @@ func rewriteValuegeneric(v *Value) bool {
 func rewriteValuegeneric_OpAdd16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Add16  (Const16 [c])  (Const16 [d]))
+       // match: (Add16 (Const16 [c]) (Const16 [d]))
        // cond:
        // result: (Const16 [int64(int16(c+d))])
        for {
@@ -442,25 +442,22 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AuxInt = int64(int16(c + d))
                return true
        }
-       // match: (Add16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Add16 (Const16 <t> [c]) x)
+       // match: (Add16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (Const16 [int64(int16(c+d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
-                       break
-               }
-               v.reset(OpAdd16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c + d))
                return true
        }
        // match: (Add16 (Const16 [0]) x)
@@ -480,6 +477,23 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Add16 x (Const16 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Add16 (Const16 [1]) (Com16 x))
        // cond:
        // result: (Neg16 x)
@@ -500,20 +514,23 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Add16 x l:(Add16 _ _))
-       // cond: (x.Op != OpAdd16 && x.Op != OpConst16)
-       // result: (Add16 l x)
+       // match: (Add16 (Com16 x) (Const16 [1]))
+       // cond:
+       // result: (Neg16 x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAdd16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpCom16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
                        break
                }
-               if !(x.Op != OpAdd16 && x.Op != OpConst16) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAdd16)
-               v.AddArg(l)
+               v.reset(OpNeg16)
                v.AddArg(x)
                return true
        }
@@ -543,6 +560,84 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Add16 (Add16 z i:(Const16 <t>)) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Add16 i (Add16 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAdd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add16 x (Add16 i:(Const16 <t>) z))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Add16 i (Add16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd16 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAdd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add16 x (Add16 z i:(Const16 <t>)))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Add16 i (Add16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd16 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAdd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Add16 (Sub16 i:(Const16 <t>) z) x)
        // cond: (z.Op != OpConst16 && x.Op != OpConst16)
        // result: (Add16 i (Sub16 <t> x z))
@@ -595,6 +690,58 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Add16 x (Sub16 i:(Const16 <t>) z))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Add16 i (Sub16 <t> x z))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub16 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAdd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub16, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add16 (Sub16 i:(Const16 <t>) z) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Add16 i (Sub16 <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub16 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAdd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub16, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Add16 (Sub16 z i:(Const16 <t>)) x)
        // cond: (z.Op != OpConst16 && x.Op != OpConst16)
        // result: (Sub16 (Add16 <t> x z) i)
@@ -647,39 +794,61 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Add16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x))
-       // cond:
-       // result: (Add16 (Const16 <t> [int64(int16(c+d))]) x)
+       // match: (Add16 x (Sub16 z i:(Const16 <t>)))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Sub16 (Add16 <t> x z) i)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub16 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpAdd16 {
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst16 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               t := i.Type
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
                        break
                }
-               if v_1_0.Type != t {
+               v.reset(OpSub16)
+               v0 := b.NewValue0(v.Pos, OpAdd16, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               v.AddArg(i)
+               return true
+       }
+       // match: (Add16 (Sub16 z i:(Const16 <t>)) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Sub16 (Add16 <t> x z) i)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub16 {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
-               v.reset(OpAdd16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = int64(int16(c + d))
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpSub16)
+               v0 := b.NewValue0(v.Pos, OpAdd16, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
                v.AddArg(v0)
-               v.AddArg(x)
+               v.AddArg(i)
                return true
        }
-       // match: (Add16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x))
+       // match: (Add16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x))
        // cond:
-       // result: (Sub16 (Const16 <t> [int64(int16(c+d))]) x)
+       // result: (Add16 (Const16 <t> [int64(int16(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst16 {
@@ -688,7 +857,7 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub16 {
+               if v_1.Op != OpAdd16 {
                        break
                }
                v_1_0 := v_1.Args[0]
@@ -700,16 +869,16 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpSub16)
+               v.reset(OpAdd16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
                v0.AuxInt = int64(int16(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add16 (Const16 <t> [c]) (Sub16 x (Const16 <t> [d])))
+       // match: (Add16 (Const16 <t> [c]) (Add16 x (Const16 <t> [d])))
        // cond:
-       // result: (Add16 (Const16 <t> [int64(int16(c-d))]) x)
+       // result: (Add16 (Const16 <t> [int64(int16(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst16 {
@@ -718,7 +887,7 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub16 {
+               if v_1.Op != OpAdd16 {
                        break
                }
                x := v_1.Args[0]
@@ -732,25 +901,205 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
                d := v_1_1.AuxInt
                v.reset(OpAdd16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = int64(int16(c - d))
+               v0.AuxInt = int64(int16(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAdd32(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (Add32  (Const32 [c])  (Const32 [d]))
+       // match: (Add16 (Add16 (Const16 <t> [d]) x) (Const16 <t> [c]))
        // cond:
-       // result: (Const32 [int64(int32(c+d))])
+       // result: (Add16 (Const16 <t> [int64(int16(c+d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpAdd16 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add16 (Add16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (Add16 (Const16 <t> [int64(int16(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x))
+       // cond:
+       // result: (Sub16 (Const16 <t> [int64(int16(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpSub16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add16 (Sub16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Sub16 (Const16 <t> [int64(int16(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub16 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpSub16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add16 (Const16 <t> [c]) (Sub16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Add16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add16 (Sub16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (Add16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAdd32(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (Add32 (Const32 [c]) (Const32 [d]))
+       // cond:
+       // result: (Const32 [int64(int32(c+d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
@@ -760,25 +1109,22 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                v.AuxInt = int64(int32(c + d))
                return true
        }
-       // match: (Add32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Add32 (Const32 <t> [c]) x)
+       // match: (Add32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (Const32 [int64(int32(c+d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
-                       break
-               }
-               v.reset(OpAdd32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c + d))
                return true
        }
        // match: (Add32 (Const32 [0]) x)
@@ -798,6 +1144,23 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Add32 x (Const32 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Add32 (Const32 [1]) (Com32 x))
        // cond:
        // result: (Neg32 x)
@@ -818,20 +1181,23 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Add32 x l:(Add32 _ _))
-       // cond: (x.Op != OpAdd32 && x.Op != OpConst32)
-       // result: (Add32 l x)
+       // match: (Add32 (Com32 x) (Const32 [1]))
+       // cond:
+       // result: (Neg32 x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAdd32 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpCom32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpAdd32 && x.Op != OpConst32) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAdd32)
-               v.AddArg(l)
+               v.reset(OpNeg32)
                v.AddArg(x)
                return true
        }
@@ -861,39 +1227,39 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Add32 (Sub32 i:(Const32 <t>) z) x)
+       // match: (Add32 (Add32 z i:(Const32 <t>)) x)
        // cond: (z.Op != OpConst32 && x.Op != OpConst32)
-       // result: (Add32 i (Sub32 <t> x z))
+       // result: (Add32 i (Add32 <t> z x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpSub32 {
+               if v_0.Op != OpAdd32 {
                        break
                }
-               i := v_0.Args[0]
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst32 {
                        break
                }
                t := i.Type
-               z := v_0.Args[1]
                x := v.Args[1]
                if !(z.Op != OpConst32 && x.Op != OpConst32) {
                        break
                }
                v.reset(OpAdd32)
                v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpSub32, t)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAdd32, t)
                v0.AddArg(z)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (Add32 x (Sub32 i:(Const32 <t>) z))
+       // match: (Add32 x (Add32 i:(Const32 <t>) z))
        // cond: (z.Op != OpConst32 && x.Op != OpConst32)
-       // result: (Add32 i (Sub32 <t> x z))
+       // result: (Add32 i (Add32 <t> z x))
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpSub32 {
+               if v_1.Op != OpAdd32 {
                        break
                }
                i := v_1.Args[0]
@@ -907,22 +1273,152 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                }
                v.reset(OpAdd32)
                v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpSub32, t)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAdd32, t)
                v0.AddArg(z)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (Add32 (Sub32 z i:(Const32 <t>)) x)
+       // match: (Add32 x (Add32 z i:(Const32 <t>)))
        // cond: (z.Op != OpConst32 && x.Op != OpConst32)
-       // result: (Sub32 (Add32 <t> x z) i)
+       // result: (Add32 i (Add32 <t> z x))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpSub32 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd32 {
                        break
                }
-               z := v_0.Args[0]
-               i := v_0.Args[1]
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAdd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add32 (Sub32 i:(Const32 <t>) z) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Add32 i (Sub32 <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAdd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add32 x (Sub32 i:(Const32 <t>) z))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Add32 i (Sub32 <t> x z))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub32 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAdd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add32 x (Sub32 i:(Const32 <t>) z))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Add32 i (Sub32 <t> x z))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub32 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAdd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add32 (Sub32 i:(Const32 <t>) z) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Add32 i (Sub32 <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAdd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add32 (Sub32 z i:(Const32 <t>)) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Sub32 (Add32 <t> x z) i)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst32 {
                        break
                }
@@ -965,39 +1461,61 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Add32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x))
-       // cond:
-       // result: (Add32 (Const32 <t> [int64(int32(c+d))]) x)
+       // match: (Add32 x (Sub32 z i:(Const32 <t>)))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Sub32 (Add32 <t> x z) i)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub32 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpAdd32 {
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst32 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst32 {
+               t := i.Type
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
                        break
                }
-               if v_1_0.Type != t {
+               v.reset(OpSub32)
+               v0 := b.NewValue0(v.Pos, OpAdd32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               v.AddArg(i)
+               return true
+       }
+       // match: (Add32 (Sub32 z i:(Const32 <t>)) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Sub32 (Add32 <t> x z) i)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
-               v.reset(OpAdd32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = int64(int32(c + d))
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpSub32)
+               v0 := b.NewValue0(v.Pos, OpAdd32, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
                v.AddArg(v0)
-               v.AddArg(x)
+               v.AddArg(i)
                return true
        }
-       // match: (Add32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x))
+       // match: (Add32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x))
        // cond:
-       // result: (Sub32 (Const32 <t> [int64(int32(c+d))]) x)
+       // result: (Add32 (Const32 <t> [int64(int32(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
@@ -1006,7 +1524,7 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub32 {
+               if v_1.Op != OpAdd32 {
                        break
                }
                v_1_0 := v_1.Args[0]
@@ -1018,16 +1536,16 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpSub32)
+               v.reset(OpAdd32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
                v0.AuxInt = int64(int32(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add32 (Const32 <t> [c]) (Sub32 x (Const32 <t> [d])))
+       // match: (Add32 (Const32 <t> [c]) (Add32 x (Const32 <t> [d])))
        // cond:
-       // result: (Add32 (Const32 <t> [int64(int32(c-d))]) x)
+       // result: (Add32 (Const32 <t> [int64(int32(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
@@ -1036,7 +1554,7 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub32 {
+               if v_1.Op != OpAdd32 {
                        break
                }
                x := v_1.Args[0]
@@ -1050,72 +1568,270 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
                d := v_1_1.AuxInt
                v.reset(OpAdd32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = int64(int32(c - d))
+               v0.AuxInt = int64(int32(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAdd32F(v *Value) bool {
-       // match: (Add32F (Const32F [c]) (Const32F [d]))
+       // match: (Add32 (Add32 (Const32 <t> [d]) x) (Const32 <t> [c]))
        // cond:
-       // result: (Const32F [f2i(float64(i2f32(c) + i2f32(d)))])
+       // result: (Add32 (Const32 <t> [int64(int32(c+d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32F {
+               if v_0.Op != OpAdd32 {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst32F {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpConst32F)
-               v.AuxInt = f2i(float64(i2f32(c) + i2f32(d)))
-               return true
-       }
-       // match: (Add32F x (Const32F [0]))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32F {
+               if v_1.Op != OpConst32 {
                        break
                }
-               if v_1.AuxInt != 0 {
+               if v_1.Type != t {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               c := v_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c + d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add32F (Const32F [0]) x)
+       // match: (Add32 (Add32 x (Const32 <t> [d])) (Const32 <t> [c]))
        // cond:
-       // result: x
+       // result: (Add32 (Const32 <t> [int64(int32(c+d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32F {
+               if v_0.Op != OpAdd32 {
                        break
                }
-               if v_0.AuxInt != 0 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValuegeneric_OpAdd64(v *Value) bool {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x))
+       // cond:
+       // result: (Sub32 (Const32 <t> [int64(int32(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub32 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst32 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpSub32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add32 (Sub32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (Sub32 (Const32 <t> [int64(int32(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpSub32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add32 (Const32 <t> [c]) (Sub32 x (Const32 <t> [d])))
+       // cond:
+       // result: (Add32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub32 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add32 (Sub32 x (Const32 <t> [d])) (Const32 <t> [c]))
+       // cond:
+       // result: (Add32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAdd32F(v *Value) bool {
+       // match: (Add32F (Const32F [c]) (Const32F [d]))
+       // cond:
+       // result: (Const32F [f2i(float64(i2f32(c) + i2f32(d)))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32F {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32F {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConst32F)
+               v.AuxInt = f2i(float64(i2f32(c) + i2f32(d)))
+               return true
+       }
+       // match: (Add32F (Const32F [d]) (Const32F [c]))
+       // cond:
+       // result: (Const32F [f2i(float64(i2f32(c) + i2f32(d)))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32F {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32F {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst32F)
+               v.AuxInt = f2i(float64(i2f32(c) + i2f32(d)))
+               return true
+       }
+       // match: (Add32F x (Const32F [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32F {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add32F (Const32F [0]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32F {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAdd64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Add64  (Const64 [c])  (Const64 [d]))
+       // match: (Add64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c+d])
        for {
@@ -1133,25 +1849,22 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AuxInt = c + d
                return true
        }
-       // match: (Add64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Add64 (Const64 <t> [c]) x)
+       // match: (Add64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (Const64 [c+d])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
-                       break
-               }
-               v.reset(OpAdd64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst64)
+               v.AuxInt = c + d
                return true
        }
        // match: (Add64 (Const64 [0]) x)
@@ -1171,6 +1884,23 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Add64 x (Const64 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Add64 (Const64 [1]) (Com64 x))
        // cond:
        // result: (Neg64 x)
@@ -1191,20 +1921,23 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Add64 x l:(Add64 _ _))
-       // cond: (x.Op != OpAdd64 && x.Op != OpConst64)
-       // result: (Add64 l x)
+       // match: (Add64 (Com64 x) (Const64 [1]))
+       // cond:
+       // result: (Neg64 x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAdd64 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpCom64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
-               if !(x.Op != OpAdd64 && x.Op != OpConst64) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAdd64)
-               v.AddArg(l)
+               v.reset(OpNeg64)
                v.AddArg(x)
                return true
        }
@@ -1234,12 +1967,90 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Add64 (Sub64 i:(Const64 <t>) z) x)
+       // match: (Add64 (Add64 z i:(Const64 <t>)) x)
        // cond: (z.Op != OpConst64 && x.Op != OpConst64)
-       // result: (Add64 i (Sub64 <t> x z))
+       // result: (Add64 i (Add64 <t> z x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpSub64 {
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAdd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add64 x (Add64 i:(Const64 <t>) z))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Add64 i (Add64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd64 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAdd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add64 x (Add64 z i:(Const64 <t>)))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Add64 i (Add64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd64 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAdd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add64 (Sub64 i:(Const64 <t>) z) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Add64 i (Sub64 <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64 {
                        break
                }
                i := v_0.Args[0]
@@ -1286,6 +2097,58 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Add64 x (Sub64 i:(Const64 <t>) z))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Add64 i (Sub64 <t> x z))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub64 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAdd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub64, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add64 (Sub64 i:(Const64 <t>) z) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Add64 i (Sub64 <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAdd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub64, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Add64 (Sub64 z i:(Const64 <t>)) x)
        // cond: (z.Op != OpConst64 && x.Op != OpConst64)
        // result: (Sub64 (Add64 <t> x z) i)
@@ -1338,39 +2201,61 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Add64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x))
-       // cond:
-       // result: (Add64 (Const64 <t> [c+d]) x)
+       // match: (Add64 x (Sub64 z i:(Const64 <t>)))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Sub64 (Add64 <t> x z) i)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub64 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpAdd64 {
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst64 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst64 {
+               t := i.Type
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
                        break
                }
-               if v_1_0.Type != t {
+               v.reset(OpSub64)
+               v0 := b.NewValue0(v.Pos, OpAdd64, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               v.AddArg(i)
+               return true
+       }
+       // match: (Add64 (Sub64 z i:(Const64 <t>)) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Sub64 (Add64 <t> x z) i)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64 {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
-               v.reset(OpAdd64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c + d
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpSub64)
+               v0 := b.NewValue0(v.Pos, OpAdd64, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
                v.AddArg(v0)
-               v.AddArg(x)
+               v.AddArg(i)
                return true
        }
-       // match: (Add64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x))
+       // match: (Add64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x))
        // cond:
-       // result: (Sub64 (Const64 <t> [c+d]) x)
+       // result: (Add64 (Const64 <t> [c+d]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst64 {
@@ -1379,7 +2264,7 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub64 {
+               if v_1.Op != OpAdd64 {
                        break
                }
                v_1_0 := v_1.Args[0]
@@ -1391,16 +2276,16 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpSub64)
+               v.reset(OpAdd64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
                v0.AuxInt = c + d
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add64 (Const64 <t> [c]) (Sub64 x (Const64 <t> [d])))
+       // match: (Add64 (Const64 <t> [c]) (Add64 x (Const64 <t> [d])))
        // cond:
-       // result: (Add64 (Const64 <t> [c-d]) x)
+       // result: (Add64 (Const64 <t> [c+d]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst64 {
@@ -1409,7 +2294,7 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub64 {
+               if v_1.Op != OpAdd64 {
                        break
                }
                x := v_1.Args[0]
@@ -1423,72 +2308,270 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
                d := v_1_1.AuxInt
                v.reset(OpAdd64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c - d
+               v0.AuxInt = c + d
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAdd64F(v *Value) bool {
-       // match: (Add64F (Const64F [c]) (Const64F [d]))
+       // match: (Add64 (Add64 (Const64 <t> [d]) x) (Const64 <t> [c]))
        // cond:
-       // result: (Const64F [f2i(i2f(c) + i2f(d))])
+       // result: (Add64 (Const64 <t> [c+d]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64F {
+               if v_0.Op != OpAdd64 {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst64F {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpConst64F)
-               v.AuxInt = f2i(i2f(c) + i2f(d))
-               return true
-       }
-       // match: (Add64F x (Const64F [0]))
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64F {
+               if v_1.Op != OpConst64 {
                        break
                }
-               if v_1.AuxInt != 0 {
+               if v_1.Type != t {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               c := v_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c + d
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add64F (Const64F [0]) x)
+       // match: (Add64 (Add64 x (Const64 <t> [d])) (Const64 <t> [c]))
        // cond:
-       // result: x
+       // result: (Add64 (Const64 <t> [c+d]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64F {
+               if v_0.Op != OpAdd64 {
                        break
                }
-               if v_0.AuxInt != 0 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValuegeneric_OpAdd8(v *Value) bool {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c + d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x))
+       // cond:
+       // result: (Sub64 (Const64 <t> [c+d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub64 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst64 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpSub64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c + d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add64 (Sub64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Sub64 (Const64 <t> [c+d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpSub64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c + d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add64 (Const64 <t> [c]) (Sub64 x (Const64 <t> [d])))
+       // cond:
+       // result: (Add64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add64 (Sub64 x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (Add64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAdd64F(v *Value) bool {
+       // match: (Add64F (Const64F [c]) (Const64F [d]))
+       // cond:
+       // result: (Const64F [f2i(i2f(c) + i2f(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64F {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64F {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConst64F)
+               v.AuxInt = f2i(i2f(c) + i2f(d))
+               return true
+       }
+       // match: (Add64F (Const64F [d]) (Const64F [c]))
+       // cond:
+       // result: (Const64F [f2i(i2f(c) + i2f(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64F {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64F {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst64F)
+               v.AuxInt = f2i(i2f(c) + i2f(d))
+               return true
+       }
+       // match: (Add64F x (Const64F [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64F {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add64F (Const64F [0]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64F {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAdd8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Add8   (Const8 [c])   (Const8 [d]))
+       // match: (Add8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8  [int64(int8(c+d))])
        for {
@@ -1506,28 +2589,25 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AuxInt = int64(int8(c + d))
                return true
        }
-       // match: (Add8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (Add8  (Const8  <t> [c]) x)
+       // match: (Add8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (Const8  [int64(int8(c+d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
-                       break
-               }
-               v.reset(OpAdd8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(c + d))
                return true
        }
-       // match: (Add8  (Const8  [0]) x)
+       // match: (Add8 (Const8 [0]) x)
        // cond:
        // result: x
        for {
@@ -1544,7 +2624,24 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Add8  (Const8  [1]) (Com8  x))
+       // match: (Add8 x (Const8 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add8 (Const8 [1]) (Com8 x))
        // cond:
        // result: (Neg8  x)
        for {
@@ -1564,24 +2661,27 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Add8  x l:(Add8  _ _))
-       // cond: (x.Op != OpAdd8  && x.Op != OpConst8)
-       // result: (Add8  l x)
+       // match: (Add8 (Com8 x) (Const8 [1]))
+       // cond:
+       // result: (Neg8  x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAdd8 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpCom8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
                        break
                }
-               if !(x.Op != OpAdd8 && x.Op != OpConst8) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAdd8)
-               v.AddArg(l)
+               v.reset(OpNeg8)
                v.AddArg(x)
                return true
        }
-       // match: (Add8  (Add8  i:(Const8  <t>) z) x)
+       // match: (Add8 (Add8 i:(Const8 <t>) z) x)
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Add8  i (Add8  <t> z x))
        for {
@@ -1607,12 +2707,90 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Add8  (Sub8  i:(Const8  <t>) z) x)
+       // match: (Add8 (Add8 z i:(Const8 <t>)) x)
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
-       // result: (Add8  i (Sub8  <t> x z))
+       // result: (Add8  i (Add8  <t> z x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpSub8 {
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAdd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add8 x (Add8 i:(Const8 <t>) z))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Add8  i (Add8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd8 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAdd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add8 x (Add8 z i:(Const8 <t>)))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Add8  i (Add8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd8 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAdd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAdd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add8 (Sub8 i:(Const8 <t>) z) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Add8  i (Sub8  <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub8 {
                        break
                }
                i := v_0.Args[0]
@@ -1633,7 +2811,33 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Add8  x (Sub8  i:(Const8  <t>) z))
+       // match: (Add8 x (Sub8 i:(Const8 <t>) z))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Add8  i (Sub8  <t> x z))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub8 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAdd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub8, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add8 x (Sub8 i:(Const8 <t>) z))
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Add8  i (Sub8  <t> x z))
        for {
@@ -1659,7 +2863,33 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Add8  (Sub8  z i:(Const8  <t>)) x)
+       // match: (Add8 (Sub8 i:(Const8 <t>) z) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Add8  i (Sub8  <t> x z))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub8 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAdd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpSub8, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Add8 (Sub8 z i:(Const8 <t>)) x)
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Sub8  (Add8  <t> x z) i)
        for {
@@ -1685,7 +2915,7 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Add8  x (Sub8  z i:(Const8  <t>)))
+       // match: (Add8 x (Sub8 z i:(Const8 <t>)))
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Sub8  (Add8  <t> x z) i)
        for {
@@ -1711,39 +2941,61 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Add8  (Const8  <t> [c]) (Add8  (Const8  <t> [d]) x))
-       // cond:
-       // result: (Add8  (Const8  <t> [int64(int8(c+d))]) x)
+       // match: (Add8 x (Sub8 z i:(Const8 <t>)))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Sub8  (Add8  <t> x z) i)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst8 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub8 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpAdd8 {
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst8 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst8 {
+               t := i.Type
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
                        break
                }
-               if v_1_0.Type != t {
+               v.reset(OpSub8)
+               v0 := b.NewValue0(v.Pos, OpAdd8, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               v.AddArg(i)
+               return true
+       }
+       // match: (Add8 (Sub8 z i:(Const8 <t>)) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Sub8  (Add8  <t> x z) i)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub8 {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
-               v.reset(OpAdd8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = int64(int8(c + d))
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpSub8)
+               v0 := b.NewValue0(v.Pos, OpAdd8, t)
+               v0.AddArg(x)
+               v0.AddArg(z)
                v.AddArg(v0)
-               v.AddArg(x)
+               v.AddArg(i)
                return true
        }
-       // match: (Add8  (Const8  <t> [c]) (Sub8  (Const8  <t> [d]) x))
+       // match: (Add8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x))
        // cond:
-       // result: (Sub8  (Const8  <t> [int64(int8(c+d))]) x)
+       // result: (Add8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst8 {
@@ -1752,7 +3004,7 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub8 {
+               if v_1.Op != OpAdd8 {
                        break
                }
                v_1_0 := v_1.Args[0]
@@ -1764,16 +3016,16 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpSub8)
+               v.reset(OpAdd8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
                v0.AuxInt = int64(int8(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Add8  (Const8  <t> [c]) (Sub8  x (Const8  <t> [d])))
+       // match: (Add8 (Const8 <t> [c]) (Add8 x (Const8 <t> [d])))
        // cond:
-       // result: (Add8  (Const8  <t> [int64(int8(c-d))]) x)
+       // result: (Add8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst8 {
@@ -1782,7 +3034,7 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpSub8 {
+               if v_1.Op != OpAdd8 {
                        break
                }
                x := v_1.Args[0]
@@ -1796,59 +3048,239 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
                d := v_1_1.AuxInt
                v.reset(OpAdd8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = int64(int8(c - d))
+               v0.AuxInt = int64(int8(c + d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAddPtr(v *Value) bool {
-       // match: (AddPtr <t> x (Const64 [c]))
+       // match: (Add8 (Add8 (Const8 <t> [d]) x) (Const8 <t> [c]))
        // cond:
-       // result: (OffPtr <t> x [c])
+       // result: (Add8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
-               t := v.Type
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpOffPtr)
-               v.Type = t
-               v.AuxInt = c
+               v.reset(OpAdd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c + d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (AddPtr <t> x (Const32 [c]))
+       // match: (Add8 (Add8 x (Const8 <t> [d])) (Const8 <t> [c]))
        // cond:
-       // result: (OffPtr <t> x [c])
+       // result: (Add8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
-               t := v.Type
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpOffPtr)
-               v.Type = t
-               v.AuxInt = c
+               v.reset(OpAdd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c + d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAnd16(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (And16  (Const16 [c])  (Const16 [d]))
+       // match: (Add8 (Const8 <t> [c]) (Sub8 (Const8 <t> [d]) x))
        // cond:
-       // result: (Const16 [int64(int16(c&d))])
+       // result: (Sub8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst8 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpSub8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add8 (Sub8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Sub8  (Const8  <t> [int64(int8(c+d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpSub8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c + d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add8 (Const8 <t> [c]) (Sub8 x (Const8 <t> [d])))
+       // cond:
+       // result: (Add8  (Const8  <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpSub8 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Add8 (Sub8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (Add8  (Const8  <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAddPtr(v *Value) bool {
+       // match: (AddPtr <t> x (Const64 [c]))
+       // cond:
+       // result: (OffPtr <t> x [c])
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpOffPtr)
+               v.Type = t
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (AddPtr <t> x (Const32 [c]))
+       // cond:
+       // result: (OffPtr <t> x [c])
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpOffPtr)
+               v.Type = t
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAnd16(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (And16 (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (Const16 [int64(int16(c&d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
                        break
                }
                c := v_0.AuxInt
@@ -1861,25 +3293,22 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AuxInt = int64(int16(c & d))
                return true
        }
-       // match: (And16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (And16 (Const16 <t> [c]) x)
+       // match: (And16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (Const16 [int64(int16(c&d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
-                       break
-               }
-               v.reset(OpAnd16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c & d))
                return true
        }
        // match: (And16 x x)
@@ -1912,6 +3341,23 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (And16 x (Const16 [-1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (And16 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
@@ -1927,6 +3373,21 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (And16 _ (Const16 [0]))
+       // cond:
+       // result: (Const16 [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst16)
+               v.AuxInt = 0
+               return true
+       }
        // match: (And16 x (And16 x y))
        // cond:
        // result: (And16 x y)
@@ -1981,7 +3442,7 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And16 (And16 x y) y)
+       // match: (And16 (And16 y x) x)
        // cond:
        // result: (And16 x y)
        for {
@@ -1989,9 +3450,9 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                if v_0.Op != OpAnd16 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpAnd16)
@@ -1999,24 +3460,33 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And16 x l:(And16 _ _))
-       // cond: (x.Op != OpAnd16 && x.Op != OpConst16)
-       // result: (And16 l x)
+       // match: (And16 (And16 i:(Const16 <t>) z) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (And16 i (And16 <t> z x))
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAnd16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd16 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst16 {
                        break
                }
-               if !(x.Op != OpAnd16 && x.Op != OpConst16) {
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
                        break
                }
                v.reset(OpAnd16)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (And16 (And16 i:(Const16 <t>) z) x)
+       // match: (And16 (And16 z i:(Const16 <t>)) x)
        // cond: (z.Op != OpConst16 && x.Op != OpConst16)
        // result: (And16 i (And16 <t> z x))
        for {
@@ -2024,12 +3494,12 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                if v_0.Op != OpAnd16 {
                        break
                }
-               i := v_0.Args[0]
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst16 {
                        break
                }
                t := i.Type
-               z := v_0.Args[1]
                x := v.Args[1]
                if !(z.Op != OpConst16 && x.Op != OpConst16) {
                        break
@@ -2042,31 +3512,173 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (And16 (Const16 <t> [c]) (And16 (Const16 <t> [d]) x))
-       // cond:
-       // result: (And16 (Const16 <t> [int64(int16(c&d))]) x)
+       // match: (And16 x (And16 i:(Const16 <t>) z))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (And16 i (And16 <t> z x))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
-                       break
-               }
-               t := v_0.Type
-               c := v_0.AuxInt
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAnd16 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               i := v_1.Args[0]
+               if i.Op != OpConst16 {
                        break
                }
-               if v_1_0.Type != t {
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
                v.reset(OpAnd16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And16 x (And16 z i:(Const16 <t>)))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (And16 i (And16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd16 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpAnd16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And16 (Const16 <t> [c]) (And16 (Const16 <t> [d]) x))
+       // cond:
+       // result: (And16 (Const16 <t> [int64(int16(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpAnd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And16 (Const16 <t> [c]) (And16 x (Const16 <t> [d])))
+       // cond:
+       // result: (And16 (Const16 <t> [int64(int16(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAnd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And16 (And16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (And16 (Const16 <t> [int64(int16(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd16 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And16 (And16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (And16 (Const16 <t> [int64(int16(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
                v0.AuxInt = int64(int16(c & d))
                v.AddArg(v0)
                v.AddArg(x)
@@ -2077,7 +3689,7 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
 func rewriteValuegeneric_OpAnd32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (And32  (Const32 [c])  (Const32 [d]))
+       // match: (And32 (Const32 [c]) (Const32 [d]))
        // cond:
        // result: (Const32 [int64(int32(c&d))])
        for {
@@ -2095,25 +3707,22 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AuxInt = int64(int32(c & d))
                return true
        }
-       // match: (And32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (And32 (Const32 <t> [c]) x)
+       // match: (And32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (Const32 [int64(int32(c&d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
-                       break
-               }
-               v.reset(OpAnd32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c & d))
                return true
        }
        // match: (And32 x x)
@@ -2146,6 +3755,23 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (And32 x (Const32 [-1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (And32 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
@@ -2161,6 +3787,21 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (And32 _ (Const32 [0]))
+       // cond:
+       // result: (Const32 [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst32)
+               v.AuxInt = 0
+               return true
+       }
        // match: (And32 x (And32 x y))
        // cond:
        // result: (And32 x y)
@@ -2215,7 +3856,7 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And32 (And32 x y) y)
+       // match: (And32 (And32 y x) x)
        // cond:
        // result: (And32 x y)
        for {
@@ -2223,9 +3864,9 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                if v_0.Op != OpAnd32 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpAnd32)
@@ -2233,24 +3874,33 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And32 x l:(And32 _ _))
-       // cond: (x.Op != OpAnd32 && x.Op != OpConst32)
-       // result: (And32 l x)
+       // match: (And32 (And32 i:(Const32 <t>) z) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (And32 i (And32 <t> z x))
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAnd32 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd32 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpAnd32 && x.Op != OpConst32) {
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
                        break
                }
                v.reset(OpAnd32)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (And32 (And32 i:(Const32 <t>) z) x)
+       // match: (And32 (And32 z i:(Const32 <t>)) x)
        // cond: (z.Op != OpConst32 && x.Op != OpConst32)
        // result: (And32 i (And32 <t> z x))
        for {
@@ -2258,12 +3908,12 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                if v_0.Op != OpAnd32 {
                        break
                }
-               i := v_0.Args[0]
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst32 {
                        break
                }
                t := i.Type
-               z := v_0.Args[1]
                x := v.Args[1]
                if !(z.Op != OpConst32 && x.Op != OpConst32) {
                        break
@@ -2276,44 +3926,186 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (And32 (Const32 <t> [c]) (And32 (Const32 <t> [d]) x))
-       // cond:
-       // result: (And32 (Const32 <t> [int64(int32(c&d))]) x)
+       // match: (And32 x (And32 i:(Const32 <t>) z))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (And32 i (And32 <t> z x))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
-                       break
-               }
-               t := v_0.Type
-               c := v_0.AuxInt
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAnd32 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst32 {
+               i := v_1.Args[0]
+               if i.Op != OpConst32 {
                        break
                }
-               if v_1_0.Type != t {
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
                v.reset(OpAnd32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = int64(int32(c & d))
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
                v.AddArg(v0)
-               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpAnd64(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (And64  (Const64 [c])  (Const64 [d]))
-       // cond:
-       // result: (Const64 [c&d])
+       // match: (And32 x (And32 z i:(Const32 <t>)))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (And32 i (And32 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd32 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpAnd32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And32 (Const32 <t> [c]) (And32 (Const32 <t> [d]) x))
+       // cond:
+       // result: (And32 (Const32 <t> [int64(int32(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd32 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst32 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpAnd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And32 (Const32 <t> [c]) (And32 x (Const32 <t> [d])))
+       // cond:
+       // result: (And32 (Const32 <t> [int64(int32(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd32 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAnd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And32 (And32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (And32 (Const32 <t> [int64(int32(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And32 (And32 x (Const32 <t> [d])) (Const32 <t> [c]))
+       // cond:
+       // result: (And32 (Const32 <t> [int64(int32(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpAnd64(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (And64 (Const64 [c]) (Const64 [d]))
+       // cond:
+       // result: (Const64 [c&d])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst64 {
@@ -2329,25 +4121,22 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AuxInt = c & d
                return true
        }
-       // match: (And64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (And64 (Const64 <t> [c]) x)
+       // match: (And64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (Const64 [c&d])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
-                       break
-               }
-               v.reset(OpAnd64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst64)
+               v.AuxInt = c & d
                return true
        }
        // match: (And64 x x)
@@ -2380,6 +4169,23 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (And64 x (Const64 [-1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (And64 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
@@ -2395,6 +4201,21 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (And64 _ (Const64 [0]))
+       // cond:
+       // result: (Const64 [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst64)
+               v.AuxInt = 0
+               return true
+       }
        // match: (And64 x (And64 x y))
        // cond:
        // result: (And64 x y)
@@ -2449,7 +4270,7 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And64 (And64 x y) y)
+       // match: (And64 (And64 y x) x)
        // cond:
        // result: (And64 x y)
        for {
@@ -2457,9 +4278,9 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                if v_0.Op != OpAnd64 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpAnd64)
@@ -2493,6 +4314,32 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(v2)
                return true
        }
+       // match: (And64 <t> x (Const64 [y]))
+       // cond: nlz(y) + nto(y) == 64 && nto(y) >= 32
+       // result: (Rsh64Ux64 (Lsh64x64 <t> x (Const64 <t> [nlz(y)])) (Const64 <t> [nlz(y)]))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               y := v_1.AuxInt
+               if !(nlz(y)+nto(y) == 64 && nto(y) >= 32) {
+                       break
+               }
+               v.reset(OpRsh64Ux64)
+               v0 := b.NewValue0(v.Pos, OpLsh64x64, t)
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = nlz(y)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = nlz(y)
+               v.AddArg(v2)
+               return true
+       }
        // match: (And64 <t> (Const64 [y]) x)
        // cond: nlo(y) + ntz(y) == 64 && ntz(y) >= 32
        // result: (Lsh64x64 (Rsh64Ux64 <t> x (Const64 <t> [ntz(y)])) (Const64 <t> [ntz(y)]))
@@ -2519,21 +4366,30 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(v2)
                return true
        }
-       // match: (And64 x l:(And64 _ _))
-       // cond: (x.Op != OpAnd64 && x.Op != OpConst64)
-       // result: (And64 l x)
+       // match: (And64 <t> x (Const64 [y]))
+       // cond: nlo(y) + ntz(y) == 64 && ntz(y) >= 32
+       // result: (Lsh64x64 (Rsh64Ux64 <t> x (Const64 <t> [ntz(y)])) (Const64 <t> [ntz(y)]))
        for {
+               t := v.Type
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAnd64 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
-               if !(x.Op != OpAnd64 && x.Op != OpConst64) {
+               y := v_1.AuxInt
+               if !(nlo(y)+ntz(y) == 64 && ntz(y) >= 32) {
                        break
                }
-               v.reset(OpAnd64)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.reset(OpLsh64x64)
+               v0 := b.NewValue0(v.Pos, OpRsh64Ux64, t)
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = ntz(y)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = ntz(y)
+               v.AddArg(v2)
                return true
        }
        // match: (And64 (And64 i:(Const64 <t>) z) x)
@@ -2562,23 +4418,101 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (And64 (Const64 <t> [c]) (And64 (Const64 <t> [d]) x))
-       // cond:
-       // result: (And64 (Const64 <t> [c&d]) x)
+       // match: (And64 (And64 z i:(Const64 <t>)) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (And64 i (And64 <t> z x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpAnd64 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpAnd64 {
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst64 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst64 {
-                       break
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAnd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And64 x (And64 i:(Const64 <t>) z))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (And64 i (And64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd64 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAnd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And64 x (And64 z i:(Const64 <t>)))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (And64 i (And64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd64 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpAnd64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And64 (Const64 <t> [c]) (And64 (Const64 <t> [d]) x))
+       // cond:
+       // result: (And64 (Const64 <t> [c&d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd64 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst64 {
+                       break
                }
                if v_1_0.Type != t {
                        break
@@ -2592,12 +4526,102 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (And64 (Const64 <t> [c]) (And64 x (Const64 <t> [d])))
+       // cond:
+       // result: (And64 (Const64 <t> [c&d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAnd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c & d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And64 (And64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (And64 (Const64 <t> [c&d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c & d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And64 (And64 x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (And64 (Const64 <t> [c&d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c & d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpAnd8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (And8   (Const8 [c])   (Const8 [d]))
+       // match: (And8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8  [int64(int8(c&d))])
        for {
@@ -2615,28 +4639,25 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AuxInt = int64(int8(c & d))
                return true
        }
-       // match: (And8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (And8  (Const8  <t> [c]) x)
+       // match: (And8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (Const8  [int64(int8(c&d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
-                       break
-               }
-               v.reset(OpAnd8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(c & d))
                return true
        }
-       // match: (And8  x x)
+       // match: (And8 x x)
        // cond:
        // result: x
        for {
@@ -2649,7 +4670,7 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (And8  (Const8  [-1]) x)
+       // match: (And8 (Const8 [-1]) x)
        // cond:
        // result: x
        for {
@@ -2666,7 +4687,24 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (And8  (Const8  [0]) _)
+       // match: (And8 x (Const8 [-1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (And8 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -2681,7 +4719,22 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (And8  x (And8  x y))
+       // match: (And8 _ (Const8 [0]))
+       // cond:
+       // result: (Const8  [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst8)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (And8 x (And8 x y))
        // cond:
        // result: (And8  x y)
        for {
@@ -2699,7 +4752,7 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And8  x (And8  y x))
+       // match: (And8 x (And8 y x))
        // cond:
        // result: (And8  x y)
        for {
@@ -2717,7 +4770,7 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And8  (And8  x y) x)
+       // match: (And8 (And8 x y) x)
        // cond:
        // result: (And8  x y)
        for {
@@ -2735,7 +4788,7 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And8  (And8  x y) y)
+       // match: (And8 (And8 y x) x)
        // cond:
        // result: (And8  x y)
        for {
@@ -2743,9 +4796,9 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                if v_0.Op != OpAnd8 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpAnd8)
@@ -2753,24 +4806,33 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (And8  x l:(And8  _ _))
-       // cond: (x.Op != OpAnd8  && x.Op != OpConst8)
-       // result: (And8  l x)
+       // match: (And8 (And8 i:(Const8 <t>) z) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (And8  i (And8  <t> z x))
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAnd8 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd8 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst8 {
                        break
                }
-               if !(x.Op != OpAnd8 && x.Op != OpConst8) {
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
                        break
                }
                v.reset(OpAnd8)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (And8  (And8  i:(Const8  <t>) z) x)
+       // match: (And8 (And8 z i:(Const8 <t>)) x)
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (And8  i (And8  <t> z x))
        for {
@@ -2778,12 +4840,12 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                if v_0.Op != OpAnd8 {
                        break
                }
-               i := v_0.Args[0]
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst8 {
                        break
                }
                t := i.Type
-               z := v_0.Args[1]
                x := v.Args[1]
                if !(z.Op != OpConst8 && x.Op != OpConst8) {
                        break
@@ -2796,25 +4858,77 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (And8  (Const8  <t> [c]) (And8  (Const8  <t> [d]) x))
-       // cond:
-       // result: (And8  (Const8  <t> [int64(int8(c&d))]) x)
+       // match: (And8 x (And8 i:(Const8 <t>) z))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (And8  i (And8  <t> z x))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst8 {
-                       break
-               }
-               t := v_0.Type
-               c := v_0.AuxInt
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAnd8 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst8 {
+               i := v_1.Args[0]
+               if i.Op != OpConst8 {
                        break
                }
-               if v_1_0.Type != t {
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAnd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And8 x (And8 z i:(Const8 <t>)))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (And8  i (And8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd8 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpAnd8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAnd8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (And8 (Const8 <t> [c]) (And8 (Const8 <t> [d]) x))
+       // cond:
+       // result: (And8  (Const8  <t> [int64(int8(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst8 {
+                       break
+               }
+               if v_1_0.Type != t {
                        break
                }
                d := v_1_0.AuxInt
@@ -2826,6 +4940,96 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (And8 (Const8 <t> [c]) (And8 x (Const8 <t> [d])))
+       // cond:
+       // result: (And8  (Const8  <t> [int64(int8(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAnd8 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAnd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And8 (And8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (And8  (Const8  <t> [int64(int8(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (And8 (And8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (And8  (Const8  <t> [int64(int8(c&d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAnd8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c & d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpArg(v *Value) bool {
@@ -3177,7 +5381,7 @@ func rewriteValuegeneric_OpCom64(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpCom8(v *Value) bool {
-       // match: (Com8  (Com8  x))
+       // match: (Com8 (Com8 x))
        // cond:
        // result: x
        for {
@@ -3440,7 +5644,7 @@ func rewriteValuegeneric_OpDiv16(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div16  (Const16 [c])  (Const16 [d]))
+       // match: (Div16 (Const16 [c]) (Const16 [d]))
        // cond: d != 0
        // result: (Const16 [int64(int16(c)/int16(d))])
        for {
@@ -3591,7 +5795,7 @@ func rewriteValuegeneric_OpDiv16u(v *Value) bool {
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div16u (Const16 [c])  (Const16 [d]))
+       // match: (Div16u (Const16 [c]) (Const16 [d]))
        // cond: d != 0
        // result: (Const16 [int64(int16(uint16(c)/uint16(d)))])
        for {
@@ -3772,7 +5976,7 @@ func rewriteValuegeneric_OpDiv32(v *Value) bool {
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div32  (Const32 [c])  (Const32 [d]))
+       // match: (Div32 (Const32 [c]) (Const32 [d]))
        // cond: d != 0
        // result: (Const32 [int64(int32(c)/int32(d))])
        for {
@@ -4040,7 +6244,7 @@ func rewriteValuegeneric_OpDiv32u(v *Value) bool {
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div32u (Const32 [c])  (Const32 [d]))
+       // match: (Div32u (Const32 [c]) (Const32 [d]))
        // cond: d != 0
        // result: (Const32 [int64(int32(uint32(c)/uint32(d)))])
        for {
@@ -4276,7 +6480,7 @@ func rewriteValuegeneric_OpDiv64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div64  (Const64 [c])  (Const64 [d]))
+       // match: (Div64 (Const64 [c]) (Const64 [d]))
        // cond: d != 0
        // result: (Const64 [c/d])
        for {
@@ -4505,7 +6709,7 @@ func rewriteValuegeneric_OpDiv64u(v *Value) bool {
        _ = config
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div64u (Const64 [c])  (Const64 [d]))
+       // match: (Div64u (Const64 [c]) (Const64 [d]))
        // cond: d != 0
        // result: (Const64 [int64(uint64(c)/uint64(d))])
        for {
@@ -4639,7 +6843,7 @@ func rewriteValuegeneric_OpDiv8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8   (Const8  [c])  (Const8  [d]))
+       // match: (Div8 (Const8 [c]) (Const8 [d]))
        // cond: d != 0
        // result: (Const8  [int64(int8(c)/int8(d))])
        for {
@@ -4660,7 +6864,7 @@ func rewriteValuegeneric_OpDiv8(v *Value) bool {
                v.AuxInt = int64(int8(c) / int8(d))
                return true
        }
-       // match: (Div8  <t> n (Const8  [c]))
+       // match: (Div8 <t> n (Const8 [c]))
        // cond: c < 0 && c != -1<<7
        // result: (Neg8  (Div8  <t> n (Const8  <t> [-c])))
        for {
@@ -4683,7 +6887,7 @@ func rewriteValuegeneric_OpDiv8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Div8  <t> x (Const8  [-1<<7 ]))
+       // match: (Div8 <t> x (Const8 [-1<<7 ]))
        // cond:
        // result: (Rsh8Ux64  (And8  <t> x (Neg8  <t> x)) (Const64 <types.UInt64> [7 ]))
        for {
@@ -4708,7 +6912,7 @@ func rewriteValuegeneric_OpDiv8(v *Value) bool {
                v.AddArg(v2)
                return true
        }
-       // match: (Div8  <t> n (Const8  [c]))
+       // match: (Div8 <t> n (Const8 [c]))
        // cond: isPowerOfTwo(c)
        // result: (Rsh8x64     (Add8  <t> n (Rsh8Ux64  <t> (Rsh8x64  <t> n (Const64 <types.UInt64> [ 7])) (Const64 <types.UInt64> [ 8-log2(c)])))     (Const64 <types.UInt64> [log2(c)]))
        for {
@@ -4788,7 +6992,7 @@ func rewriteValuegeneric_OpDiv8u(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Div8u  (Const8  [c])  (Const8  [d]))
+       // match: (Div8u (Const8 [c]) (Const8 [d]))
        // cond: d != 0
        // result: (Const8  [int64(int8(uint8(c)/uint8(d)))])
        for {
@@ -4809,7 +7013,7 @@ func rewriteValuegeneric_OpDiv8u(v *Value) bool {
                v.AuxInt = int64(int8(uint8(c) / uint8(d)))
                return true
        }
-       // match: (Div8u  n (Const8  [c]))
+       // match: (Div8u n (Const8 [c]))
        // cond: isPowerOfTwo(c&0xff)
        // result: (Rsh8Ux64 n  (Const64 <types.UInt64> [log2(c&0xff)]))
        for {
@@ -4905,23 +7109,92 @@ func rewriteValuegeneric_OpEq16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Eq16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Eq16 (Const16 <t> [c]) x)
+       // match: (Eq16 (Const16 <t> [c]) (Add16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Eq16 (Const16 <t> [int64(int16(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpEq16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq16 (Add16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Eq16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
+               v.reset(OpEq16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq16 (Add16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (Eq16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpEq16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int16(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
@@ -4944,6 +7217,24 @@ func rewriteValuegeneric_OpEq16(v *Value) bool {
                v.AuxInt = b2i(c == d)
                return true
        }
+       // match: (Eq16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c == d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c == d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpEq32(v *Value) bool {
@@ -4991,30 +7282,99 @@ func rewriteValuegeneric_OpEq32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Eq32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Eq32 (Const32 <t> [c]) x)
+       // match: (Eq32 (Const32 <t> [c]) (Add32 x (Const32 <t> [d])))
+       // cond:
+       // result: (Eq32 (Const32 <t> [int64(int32(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpAdd32 {
                        break
                }
-               t := v_1.Type
-               c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
                        break
                }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
                v.reset(OpEq32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int32(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Eq32 (Const32 [c]) (Const32 [d]))
+       // match: (Eq32 (Add32 (Const32 <t> [d]) x) (Const32 <t> [c]))
        // cond:
-       // result: (ConstBool [b2i(c == d)])
+       // result: (Eq32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpEq32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq32 (Add32 x (Const32 <t> [d])) (Const32 <t> [c]))
+       // cond:
+       // result: (Eq32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpEq32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq32 (Const32 [c]) (Const32 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c == d)])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
@@ -5030,6 +7390,24 @@ func rewriteValuegeneric_OpEq32(v *Value) bool {
                v.AuxInt = b2i(c == d)
                return true
        }
+       // match: (Eq32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c == d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c == d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpEq64(v *Value) bool {
@@ -5077,23 +7455,92 @@ func rewriteValuegeneric_OpEq64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Eq64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Eq64 (Const64 <t> [c]) x)
+       // match: (Eq64 (Const64 <t> [c]) (Add64 x (Const64 <t> [d])))
+       // cond:
+       // result: (Eq64 (Const64 <t> [c-d]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpEq64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq64 (Add64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Eq64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
+               v.reset(OpEq64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq64 (Add64 x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (Eq64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpEq64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
+               v0.AuxInt = c - d
                v.AddArg(v0)
                v.AddArg(x)
                return true
@@ -5116,12 +7563,30 @@ func rewriteValuegeneric_OpEq64(v *Value) bool {
                v.AuxInt = b2i(c == d)
                return true
        }
+       // match: (Eq64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c == d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c == d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpEq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Eq8  x x)
+       // match: (Eq8 x x)
        // cond:
        // result: (ConstBool [1])
        for {
@@ -5133,7 +7598,7 @@ func rewriteValuegeneric_OpEq8(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (Eq8  (Const8  <t> [c]) (Add8  (Const8  <t> [d]) x))
+       // match: (Eq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x))
        // cond:
        // result: (Eq8  (Const8 <t> [int64(int8(c-d))]) x)
        for {
@@ -5163,28 +7628,97 @@ func rewriteValuegeneric_OpEq8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Eq8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (Eq8  (Const8  <t> [c]) x)
+       // match: (Eq8 (Const8 <t> [c]) (Add8 x (Const8 <t> [d])))
+       // cond:
+       // result: (Eq8  (Const8 <t> [int64(int8(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd8 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpEq8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq8 (Add8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Eq8  (Const8 <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
+               v.reset(OpEq8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Eq8 (Add8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (Eq8  (Const8 <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
                        break
                }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpEq8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int8(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Eq8  (Const8  [c]) (Const8  [d]))
+       // match: (Eq8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(c == d)])
        for {
@@ -5202,6 +7736,24 @@ func rewriteValuegeneric_OpEq8(v *Value) bool {
                v.AuxInt = b2i(c == d)
                return true
        }
+       // match: (Eq8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c == d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c == d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpEqB(v *Value) bool {
@@ -5348,8 +7900,30 @@ func rewriteValuegeneric_OpEqPtr(v *Value) bool {
                v.AuxInt = b2i(a == b)
                return true
        }
-       return false
-}
+       // match: (EqPtr (Addr {b} x) (Addr {a} x))
+       // cond:
+       // result: (ConstBool [b2i(a == b)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAddr {
+                       break
+               }
+               b := v_0.Aux
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAddr {
+                       break
+               }
+               a := v_1.Aux
+               if x != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(a == b)
+               return true
+       }
+       return false
+}
 func rewriteValuegeneric_OpEqSlice(v *Value) bool {
        b := v.Block
        _ = b
@@ -5498,7 +8072,7 @@ func rewriteValuegeneric_OpGeq64U(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpGeq8(v *Value) bool {
-       // match: (Geq8  (Const8  [c]) (Const8  [d]))
+       // match: (Geq8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(c >= d)])
        for {
@@ -5519,7 +8093,7 @@ func rewriteValuegeneric_OpGeq8(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpGeq8U(v *Value) bool {
-       // match: (Geq8U  (Const8  [c]) (Const8  [d]))
+       // match: (Geq8U (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(uint8(c)  >= uint8(d))])
        for {
@@ -5666,7 +8240,7 @@ func rewriteValuegeneric_OpGreater64U(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpGreater8(v *Value) bool {
-       // match: (Greater8  (Const8  [c]) (Const8  [d]))
+       // match: (Greater8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(c > d)])
        for {
@@ -5687,7 +8261,7 @@ func rewriteValuegeneric_OpGreater8(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpGreater8U(v *Value) bool {
-       // match: (Greater8U  (Const8  [c]) (Const8  [d]))
+       // match: (Greater8U (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(uint8(c)  > uint8(d))])
        for {
@@ -5785,7 +8359,7 @@ func rewriteValuegeneric_OpInterCall(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
-       // match: (IsInBounds (ZeroExt8to32  _) (Const32 [c]))
+       // match: (IsInBounds (ZeroExt8to32 _) (Const32 [c]))
        // cond: (1 << 8)  <= c
        // result: (ConstBool [1])
        for {
@@ -5805,7 +8379,7 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt8to64  _) (Const64 [c]))
+       // match: (IsInBounds (ZeroExt8to64 _) (Const64 [c]))
        // cond: (1 << 8)  <= c
        // result: (ConstBool [1])
        for {
@@ -5877,7 +8451,7 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (IsInBounds                (And8  (Const8  [c]) _)  (Const8  [d]))
+       // match: (IsInBounds (And8 (Const8 [c]) _) (Const8 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
@@ -5902,7 +8476,32 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt8to16  (And8  (Const8  [c]) _)) (Const16 [d]))
+       // match: (IsInBounds (And8 _ (Const8 [c])) (Const8 [d]))
+       // cond: 0 <= c && c < d
+       // result: (ConstBool [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd8 {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               c := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
+                       break
+               }
+               v.reset(OpConstBool)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (IsInBounds (ZeroExt8to16 (And8 (Const8 [c]) _)) (Const16 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
@@ -5931,25 +8530,25 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt8to32  (And8  (Const8  [c]) _)) (Const32 [d]))
+       // match: (IsInBounds (ZeroExt8to16 (And8 _ (Const8 [c]))) (Const16 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpZeroExt8to32 {
+               if v_0.Op != OpZeroExt8to16 {
                        break
                }
                v_0_0 := v_0.Args[0]
                if v_0_0.Op != OpAnd8 {
                        break
                }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpConst8 {
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst8 {
                        break
                }
-               c := v_0_0_0.AuxInt
+               c := v_0_0_1.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpConst16 {
                        break
                }
                d := v_1.AuxInt
@@ -5960,12 +8559,12 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt8to64  (And8  (Const8  [c]) _)) (Const64 [d]))
+       // match: (IsInBounds (ZeroExt8to32 (And8 (Const8 [c]) _)) (Const32 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpZeroExt8to64 {
+               if v_0.Op != OpZeroExt8to32 {
                        break
                }
                v_0_0 := v_0.Args[0]
@@ -5978,7 +8577,7 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                }
                c := v_0_0_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
@@ -5989,21 +8588,25 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds                (And16 (Const16 [c]) _)  (Const16 [d]))
+       // match: (IsInBounds (ZeroExt8to32 (And8 _ (Const8 [c]))) (Const32 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAnd16 {
+               if v_0.Op != OpZeroExt8to32 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst16 {
+               if v_0_0.Op != OpAnd8 {
                        break
                }
-               c := v_0_0.AuxInt
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst8 {
+                       break
+               }
+               c := v_0_0_1.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
@@ -6014,25 +8617,25 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt16to32 (And16 (Const16 [c]) _)) (Const32 [d]))
+       // match: (IsInBounds (ZeroExt8to64 (And8 (Const8 [c]) _)) (Const64 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpZeroExt16to32 {
+               if v_0.Op != OpZeroExt8to64 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAnd16 {
+               if v_0_0.Op != OpAnd8 {
                        break
                }
                v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpConst16 {
+               if v_0_0_0.Op != OpConst8 {
                        break
                }
                c := v_0_0_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpConst64 {
                        break
                }
                d := v_1.AuxInt
@@ -6043,23 +8646,23 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt16to64 (And16 (Const16 [c]) _)) (Const64 [d]))
+       // match: (IsInBounds (ZeroExt8to64 (And8 _ (Const8 [c]))) (Const64 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpZeroExt16to64 {
+               if v_0.Op != OpZeroExt8to64 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAnd16 {
+               if v_0_0.Op != OpAnd8 {
                        break
                }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpConst16 {
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst8 {
                        break
                }
-               c := v_0_0_0.AuxInt
+               c := v_0_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
@@ -6072,21 +8675,21 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds                (And32 (Const32 [c]) _)  (Const32 [d]))
+       // match: (IsInBounds (And16 (Const16 [c]) _) (Const16 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAnd32 {
+               if v_0.Op != OpAnd16 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst32 {
+               if v_0_0.Op != OpConst16 {
                        break
                }
                c := v_0_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpConst16 {
                        break
                }
                d := v_1.AuxInt
@@ -6097,25 +8700,21 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (ZeroExt32to64 (And32 (Const32 [c]) _)) (Const64 [d]))
+       // match: (IsInBounds (And16 _ (Const16 [c])) (Const16 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpZeroExt32to64 {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAnd32 {
+               if v_0.Op != OpAnd16 {
                        break
                }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpConst32 {
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
                        break
                }
-               c := v_0_0_0.AuxInt
+               c := v_0_1.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst16 {
                        break
                }
                d := v_1.AuxInt
@@ -6126,21 +8725,25 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds                (And64 (Const64 [c]) _)  (Const64 [d]))
+       // match: (IsInBounds (ZeroExt16to32 (And16 (Const16 [c]) _)) (Const32 [d]))
        // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAnd64 {
+               if v_0.Op != OpZeroExt16to32 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst64 {
+               if v_0_0.Op != OpAnd16 {
                        break
                }
-               c := v_0_0.AuxInt
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0_0_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
@@ -6151,187 +8754,254 @@ func rewriteValuegeneric_OpIsInBounds(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (Const32 [c]) (Const32 [d]))
-       // cond:
-       // result: (ConstBool [b2i(0 <= c && c < d)])
+       // match: (IsInBounds (ZeroExt16to32 (And16 _ (Const16 [c]))) (Const32 [d]))
+       // cond: 0 <= c && c < d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpZeroExt16to32 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAnd16 {
+                       break
+               }
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst16 {
+                       break
+               }
+               c := v_0_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c < d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(0 <= c && c < d)
+               v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (Const64 [c]) (Const64 [d]))
-       // cond:
-       // result: (ConstBool [b2i(0 <= c && c < d)])
+       // match: (IsInBounds (ZeroExt16to64 (And16 (Const16 [c]) _)) (Const64 [d]))
+       // cond: 0 <= c && c < d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpZeroExt16to64 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAnd16 {
+                       break
+               }
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0_0_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c < d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(0 <= c && c < d)
+               v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (Mod32u _ y) y)
-       // cond:
+       // match: (IsInBounds (ZeroExt16to64 (And16 _ (Const16 [c]))) (Const64 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpMod32u {
+               if v_0.Op != OpZeroExt16to64 {
                        break
                }
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAnd16 {
+                       break
+               }
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst16 {
+                       break
+               }
+               c := v_0_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsInBounds (Mod64u _ y) y)
-       // cond:
+       // match: (IsInBounds (And32 (Const32 [c]) _) (Const32 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpMod64u {
+               if v_0.Op != OpAnd32 {
                        break
                }
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
                        break
                }
-               v.reset(OpConstBool)
-               v.AuxInt = 1
-               return true
-       }
-       return false
-}
-func rewriteValuegeneric_OpIsNonNil(v *Value) bool {
-       // match: (IsNonNil (ConstNil))
-       // cond:
-       // result: (ConstBool [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConstNil {
+               c := v_0_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               v.reset(OpConstBool)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValuegeneric_OpIsSliceInBounds(v *Value) bool {
-       // match: (IsSliceInBounds x x)
-       // cond:
-       // result: (ConstBool [1])
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsSliceInBounds (And32 (Const32 [c]) _) (Const32 [d]))
-       // cond: 0 <= c && c <= d
+       // match: (IsInBounds (And32 _ (Const32 [c])) (Const32 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAnd32 {
                        break
                }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst32 {
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               c := v_0_0.AuxInt
+               c := v_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
-               if !(0 <= c && c <= d) {
+               if !(0 <= c && c < d) {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsSliceInBounds (And64 (Const64 [c]) _) (Const64 [d]))
-       // cond: 0 <= c && c <= d
+       // match: (IsInBounds (ZeroExt32to64 (And32 (Const32 [c]) _)) (Const64 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAnd64 {
+               if v_0.Op != OpZeroExt32to64 {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst64 {
+               if v_0_0.Op != OpAnd32 {
                        break
                }
-               c := v_0_0.AuxInt
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0_0_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
                d := v_1.AuxInt
-               if !(0 <= c && c <= d) {
+               if !(0 <= c && c < d) {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsSliceInBounds (Const32 [0]) _)
-       // cond:
+       // match: (IsInBounds (ZeroExt32to64 (And32 _ (Const32 [c]))) (Const64 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpZeroExt32to64 {
                        break
                }
-               if v_0.AuxInt != 0 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAnd32 {
                        break
                }
-               v.reset(OpConstBool)
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpConst32 {
+                       break
+               }
+               c := v_0_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
+                       break
+               }
+               v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsSliceInBounds (Const64 [0]) _)
-       // cond:
+       // match: (IsInBounds (And64 (Const64 [c]) _) (Const64 [d]))
+       // cond: 0 <= c && c < d
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpAnd64 {
                        break
                }
-               if v_0.AuxInt != 0 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       // match: (IsSliceInBounds (Const32 [c]) (Const32 [d]))
+       // match: (IsInBounds (And64 _ (Const64 [c])) (Const64 [d]))
+       // cond: 0 <= c && c < d
+       // result: (ConstBool [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               c := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(0 <= c && c < d) {
+                       break
+               }
+               v.reset(OpConstBool)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (IsInBounds (Const32 [c]) (Const32 [d]))
        // cond:
-       // result: (ConstBool [b2i(0 <= c && c <= d)])
+       // result: (ConstBool [b2i(0 <= c && c < d)])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
@@ -6344,12 +9014,12 @@ func rewriteValuegeneric_OpIsSliceInBounds(v *Value) bool {
                }
                d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(0 <= c && c <= d)
+               v.AuxInt = b2i(0 <= c && c < d)
                return true
        }
-       // match: (IsSliceInBounds (Const64 [c]) (Const64 [d]))
+       // match: (IsInBounds (Const64 [c]) (Const64 [d]))
        // cond:
-       // result: (ConstBool [b2i(0 <= c && c <= d)])
+       // result: (ConstBool [b2i(0 <= c && c < d)])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst64 {
@@ -6362,227 +9032,455 @@ func rewriteValuegeneric_OpIsSliceInBounds(v *Value) bool {
                }
                d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(0 <= c && c <= d)
+               v.AuxInt = b2i(0 <= c && c < d)
                return true
        }
-       // match: (IsSliceInBounds (SliceLen x) (SliceCap x))
+       // match: (IsInBounds (Mod32u _ y) y)
        // cond:
        // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpSliceLen {
-                       break
-               }
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpSliceCap {
+               if v_0.Op != OpMod32u {
                        break
                }
-               if x != v_1.Args[0] {
+               y := v_0.Args[1]
+               if y != v.Args[1] {
                        break
                }
                v.reset(OpConstBool)
                v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq16(v *Value) bool {
-       // match: (Leq16 (Const16 [c]) (Const16 [d]))
+       // match: (IsInBounds (Mod64u _ y) y)
        // cond:
-       // result: (ConstBool [b2i(c <= d)])
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpMod64u {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               y := v_0.Args[1]
+               if y != v.Args[1] {
                        break
                }
-               d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(c <= d)
+               v.AuxInt = 1
                return true
        }
        return false
 }
-func rewriteValuegeneric_OpLeq16U(v *Value) bool {
-       // match: (Leq16U (Const16 [c]) (Const16 [d]))
+func rewriteValuegeneric_OpIsNonNil(v *Value) bool {
+       // match: (IsNonNil (ConstNil))
        // cond:
-       // result: (ConstBool [b2i(uint16(c) <= uint16(d))])
+       // result: (ConstBool [0])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
-                       break
-               }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_0.Op != OpConstNil {
                        break
                }
-               d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(uint16(c) <= uint16(d))
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValuegeneric_OpLeq32(v *Value) bool {
-       // match: (Leq32 (Const32 [c]) (Const32 [d]))
+func rewriteValuegeneric_OpIsSliceInBounds(v *Value) bool {
+       // match: (IsSliceInBounds x x)
        // cond:
-       // result: (ConstBool [b2i(c <= d)])
+       // result: (ConstBool [1])
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpConstBool)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (IsSliceInBounds (And32 (Const32 [c]) _) (Const32 [d]))
+       // cond: 0 <= c && c <= d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpAnd32 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c <= d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(c <= d)
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq32U(v *Value) bool {
-       // match: (Leq32U (Const32 [c]) (Const32 [d]))
-       // cond:
-       // result: (ConstBool [b2i(uint32(c) <= uint32(d))])
+       // match: (IsSliceInBounds (And32 _ (Const32 [c])) (Const32 [d]))
+       // cond: 0 <= c && c <= d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpAnd32 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
+                       break
+               }
+               c := v_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c <= d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(uint32(c) <= uint32(d))
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq64(v *Value) bool {
-       // match: (Leq64 (Const64 [c]) (Const64 [d]))
-       // cond:
-       // result: (ConstBool [b2i(c <= d)])
+       // match: (IsSliceInBounds (And64 (Const64 [c]) _) (Const64 [d]))
+       // cond: 0 <= c && c <= d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpAnd64 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c <= d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(c <= d)
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq64U(v *Value) bool {
-       // match: (Leq64U (Const64 [c]) (Const64 [d]))
-       // cond:
-       // result: (ConstBool [b2i(uint64(c) <= uint64(d))])
+       // match: (IsSliceInBounds (And64 _ (Const64 [c])) (Const64 [d]))
+       // cond: 0 <= c && c <= d
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpAnd64 {
                        break
                }
-               c := v_0.AuxInt
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               c := v_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
                d := v_1.AuxInt
+               if !(0 <= c && c <= d) {
+                       break
+               }
                v.reset(OpConstBool)
-               v.AuxInt = b2i(uint64(c) <= uint64(d))
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq8(v *Value) bool {
-       // match: (Leq8  (Const8  [c]) (Const8  [d]))
+       // match: (IsSliceInBounds (Const32 [0]) _)
        // cond:
-       // result: (ConstBool [b2i(c <= d)])
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst8 {
+               if v_0.Op != OpConst32 {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst8 {
+               if v_0.AuxInt != 0 {
                        break
                }
-               d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(c <= d)
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLeq8U(v *Value) bool {
-       // match: (Leq8U  (Const8  [c]) (Const8  [d]))
+       // match: (IsSliceInBounds (Const64 [0]) _)
        // cond:
-       // result: (ConstBool [b2i(uint8(c)  <= uint8(d))])
+       // result: (ConstBool [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst8 {
+               if v_0.Op != OpConst64 {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst8 {
+               if v_0.AuxInt != 0 {
                        break
                }
-               d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(uint8(c) <= uint8(d))
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLess16(v *Value) bool {
-       // match: (Less16 (Const16 [c]) (Const16 [d]))
+       // match: (IsSliceInBounds (Const32 [c]) (Const32 [d]))
        // cond:
-       // result: (ConstBool [b2i(c < d)])
+       // result: (ConstBool [b2i(0 <= c && c <= d)])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst32 {
                        break
                }
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
                v.reset(OpConstBool)
-               v.AuxInt = b2i(c < d)
+               v.AuxInt = b2i(0 <= c && c <= d)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpLess16U(v *Value) bool {
-       // match: (Less16U (Const16 [c]) (Const16 [d]))
+       // match: (IsSliceInBounds (Const64 [c]) (Const64 [d]))
        // cond:
-       // result: (ConstBool [b2i(uint16(c) < uint16(d))])
+       // result: (ConstBool [b2i(0 <= c && c <= d)])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(0 <= c && c <= d)
+               return true
+       }
+       // match: (IsSliceInBounds (SliceLen x) (SliceCap x))
+       // cond:
+       // result: (ConstBool [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSliceLen {
+                       break
+               }
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpSliceCap {
+                       break
+               }
+               if x != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpConstBool)
+               v.AuxInt = 1
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq16(v *Value) bool {
+       // match: (Leq16 (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c <= d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c <= d)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq16U(v *Value) bool {
+       // match: (Leq16U (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (ConstBool [b2i(uint16(c) <= uint16(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(uint16(c) <= uint16(d))
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq32(v *Value) bool {
+       // match: (Leq32 (Const32 [c]) (Const32 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c <= d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c <= d)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq32U(v *Value) bool {
+       // match: (Leq32U (Const32 [c]) (Const32 [d]))
+       // cond:
+       // result: (ConstBool [b2i(uint32(c) <= uint32(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(uint32(c) <= uint32(d))
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq64(v *Value) bool {
+       // match: (Leq64 (Const64 [c]) (Const64 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c <= d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c <= d)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq64U(v *Value) bool {
+       // match: (Leq64U (Const64 [c]) (Const64 [d]))
+       // cond:
+       // result: (ConstBool [b2i(uint64(c) <= uint64(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(uint64(c) <= uint64(d))
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq8(v *Value) bool {
+       // match: (Leq8 (Const8 [c]) (Const8 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c <= d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c <= d)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLeq8U(v *Value) bool {
+       // match: (Leq8U (Const8 [c]) (Const8 [d]))
+       // cond:
+       // result: (ConstBool [b2i(uint8(c)  <= uint8(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(uint8(c) <= uint8(d))
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLess16(v *Value) bool {
+       // match: (Less16 (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (ConstBool [b2i(c < d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c < d)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpLess16U(v *Value) bool {
+       // match: (Less16U (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (ConstBool [b2i(uint16(c) < uint16(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
                        break
                }
                c := v_0.AuxInt
@@ -6682,7 +9580,7 @@ func rewriteValuegeneric_OpLess64U(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpLess8(v *Value) bool {
-       // match: (Less8  (Const8  [c]) (Const8  [d]))
+       // match: (Less8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(c < d)])
        for {
@@ -6703,7 +9601,7 @@ func rewriteValuegeneric_OpLess8(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpLess8U(v *Value) bool {
-       // match: (Less8U  (Const8  [c]) (Const8  [d]))
+       // match: (Less8U (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(uint8(c)  < uint8(d))])
        for {
@@ -6915,7 +9813,7 @@ func rewriteValuegeneric_OpLoad(v *Value) bool {
 func rewriteValuegeneric_OpLsh16x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x16  <t> x (Const16 [c]))
+       // match: (Lsh16x16 <t> x (Const16 [c]))
        // cond:
        // result: (Lsh16x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -6933,7 +9831,7 @@ func rewriteValuegeneric_OpLsh16x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh16x16  (Const16 [0]) _)
+       // match: (Lsh16x16 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -6953,7 +9851,7 @@ func rewriteValuegeneric_OpLsh16x16(v *Value) bool {
 func rewriteValuegeneric_OpLsh16x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x32  <t> x (Const32 [c]))
+       // match: (Lsh16x32 <t> x (Const32 [c]))
        // cond:
        // result: (Lsh16x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -6971,7 +9869,7 @@ func rewriteValuegeneric_OpLsh16x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh16x32  (Const16 [0]) _)
+       // match: (Lsh16x32 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -6993,7 +9891,7 @@ func rewriteValuegeneric_OpLsh16x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh16x64  (Const16 [c]) (Const64 [d]))
+       // match: (Lsh16x64 (Const16 [c]) (Const64 [d]))
        // cond:
        // result: (Const16 [int64(int16(c) << uint64(d))])
        for {
@@ -7011,7 +9909,7 @@ func rewriteValuegeneric_OpLsh16x64(v *Value) bool {
                v.AuxInt = int64(int16(c) << uint64(d))
                return true
        }
-       // match: (Lsh16x64  x (Const64 [0]))
+       // match: (Lsh16x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -7028,7 +9926,7 @@ func rewriteValuegeneric_OpLsh16x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh16x64  (Const16 [0]) _)
+       // match: (Lsh16x64 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -7043,7 +9941,7 @@ func rewriteValuegeneric_OpLsh16x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh16x64  _ (Const64 [c]))
+       // match: (Lsh16x64 _ (Const64 [c]))
        // cond: uint64(c) >= 16
        // result: (Const16 [0])
        for {
@@ -7132,7 +10030,7 @@ func rewriteValuegeneric_OpLsh16x64(v *Value) bool {
 func rewriteValuegeneric_OpLsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh16x8   <t> x (Const8  [c]))
+       // match: (Lsh16x8 <t> x (Const8 [c]))
        // cond:
        // result: (Lsh16x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -7150,7 +10048,7 @@ func rewriteValuegeneric_OpLsh16x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh16x8  (Const16 [0]) _)
+       // match: (Lsh16x8 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -7170,7 +10068,7 @@ func rewriteValuegeneric_OpLsh16x8(v *Value) bool {
 func rewriteValuegeneric_OpLsh32x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x16  <t> x (Const16 [c]))
+       // match: (Lsh32x16 <t> x (Const16 [c]))
        // cond:
        // result: (Lsh32x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -7188,7 +10086,7 @@ func rewriteValuegeneric_OpLsh32x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh32x16  (Const32 [0]) _)
+       // match: (Lsh32x16 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -7208,7 +10106,7 @@ func rewriteValuegeneric_OpLsh32x16(v *Value) bool {
 func rewriteValuegeneric_OpLsh32x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x32  <t> x (Const32 [c]))
+       // match: (Lsh32x32 <t> x (Const32 [c]))
        // cond:
        // result: (Lsh32x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -7226,7 +10124,7 @@ func rewriteValuegeneric_OpLsh32x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh32x32  (Const32 [0]) _)
+       // match: (Lsh32x32 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -7248,7 +10146,7 @@ func rewriteValuegeneric_OpLsh32x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh32x64  (Const32 [c]) (Const64 [d]))
+       // match: (Lsh32x64 (Const32 [c]) (Const64 [d]))
        // cond:
        // result: (Const32 [int64(int32(c) << uint64(d))])
        for {
@@ -7266,7 +10164,7 @@ func rewriteValuegeneric_OpLsh32x64(v *Value) bool {
                v.AuxInt = int64(int32(c) << uint64(d))
                return true
        }
-       // match: (Lsh32x64  x (Const64 [0]))
+       // match: (Lsh32x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -7283,7 +10181,7 @@ func rewriteValuegeneric_OpLsh32x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh32x64  (Const32 [0]) _)
+       // match: (Lsh32x64 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -7298,7 +10196,7 @@ func rewriteValuegeneric_OpLsh32x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh32x64  _ (Const64 [c]))
+       // match: (Lsh32x64 _ (Const64 [c]))
        // cond: uint64(c) >= 32
        // result: (Const32 [0])
        for {
@@ -7387,7 +10285,7 @@ func rewriteValuegeneric_OpLsh32x64(v *Value) bool {
 func rewriteValuegeneric_OpLsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh32x8   <t> x (Const8  [c]))
+       // match: (Lsh32x8 <t> x (Const8 [c]))
        // cond:
        // result: (Lsh32x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -7405,7 +10303,7 @@ func rewriteValuegeneric_OpLsh32x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh32x8  (Const32 [0]) _)
+       // match: (Lsh32x8 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -7425,7 +10323,7 @@ func rewriteValuegeneric_OpLsh32x8(v *Value) bool {
 func rewriteValuegeneric_OpLsh64x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh64x16  <t> x (Const16 [c]))
+       // match: (Lsh64x16 <t> x (Const16 [c]))
        // cond:
        // result: (Lsh64x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -7443,7 +10341,7 @@ func rewriteValuegeneric_OpLsh64x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh64x16  (Const64 [0]) _)
+       // match: (Lsh64x16 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -7463,7 +10361,7 @@ func rewriteValuegeneric_OpLsh64x16(v *Value) bool {
 func rewriteValuegeneric_OpLsh64x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh64x32  <t> x (Const32 [c]))
+       // match: (Lsh64x32 <t> x (Const32 [c]))
        // cond:
        // result: (Lsh64x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -7481,7 +10379,7 @@ func rewriteValuegeneric_OpLsh64x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh64x32  (Const64 [0]) _)
+       // match: (Lsh64x32 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -7503,7 +10401,7 @@ func rewriteValuegeneric_OpLsh64x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh64x64  (Const64 [c]) (Const64 [d]))
+       // match: (Lsh64x64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c << uint64(d)])
        for {
@@ -7521,7 +10419,7 @@ func rewriteValuegeneric_OpLsh64x64(v *Value) bool {
                v.AuxInt = c << uint64(d)
                return true
        }
-       // match: (Lsh64x64  x (Const64 [0]))
+       // match: (Lsh64x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -7538,7 +10436,7 @@ func rewriteValuegeneric_OpLsh64x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh64x64  (Const64 [0]) _)
+       // match: (Lsh64x64 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -7553,7 +10451,7 @@ func rewriteValuegeneric_OpLsh64x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh64x64  _ (Const64 [c]))
+       // match: (Lsh64x64 _ (Const64 [c]))
        // cond: uint64(c) >= 64
        // result: (Const64 [0])
        for {
@@ -7642,7 +10540,7 @@ func rewriteValuegeneric_OpLsh64x64(v *Value) bool {
 func rewriteValuegeneric_OpLsh64x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh64x8   <t> x (Const8  [c]))
+       // match: (Lsh64x8 <t> x (Const8 [c]))
        // cond:
        // result: (Lsh64x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -7660,7 +10558,7 @@ func rewriteValuegeneric_OpLsh64x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh64x8  (Const64 [0]) _)
+       // match: (Lsh64x8 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -7680,7 +10578,7 @@ func rewriteValuegeneric_OpLsh64x8(v *Value) bool {
 func rewriteValuegeneric_OpLsh8x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x16  <t> x (Const16 [c]))
+       // match: (Lsh8x16 <t> x (Const16 [c]))
        // cond:
        // result: (Lsh8x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -7698,7 +10596,7 @@ func rewriteValuegeneric_OpLsh8x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh8x16   (Const8 [0]) _)
+       // match: (Lsh8x16 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -7718,7 +10616,7 @@ func rewriteValuegeneric_OpLsh8x16(v *Value) bool {
 func rewriteValuegeneric_OpLsh8x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x32  <t> x (Const32 [c]))
+       // match: (Lsh8x32 <t> x (Const32 [c]))
        // cond:
        // result: (Lsh8x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -7736,7 +10634,7 @@ func rewriteValuegeneric_OpLsh8x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh8x32   (Const8 [0]) _)
+       // match: (Lsh8x32 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -7758,7 +10656,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Lsh8x64   (Const8  [c]) (Const64 [d]))
+       // match: (Lsh8x64 (Const8 [c]) (Const64 [d]))
        // cond:
        // result: (Const8  [int64(int8(c) << uint64(d))])
        for {
@@ -7776,7 +10674,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
                v.AuxInt = int64(int8(c) << uint64(d))
                return true
        }
-       // match: (Lsh8x64   x (Const64 [0]))
+       // match: (Lsh8x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -7793,7 +10691,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Lsh8x64   (Const8 [0]) _)
+       // match: (Lsh8x64 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -7808,7 +10706,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh8x64   _ (Const64 [c]))
+       // match: (Lsh8x64 _ (Const64 [c]))
        // cond: uint64(c) >= 8
        // result: (Const8  [0])
        for {
@@ -7824,7 +10722,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Lsh8x64  <t> (Lsh8x64  x (Const64 [c])) (Const64 [d]))
+       // match: (Lsh8x64 <t> (Lsh8x64 x (Const64 [c])) (Const64 [d]))
        // cond: !uaddOvf(c,d)
        // result: (Lsh8x64  x (Const64 <t> [c+d]))
        for {
@@ -7897,7 +10795,7 @@ func rewriteValuegeneric_OpLsh8x64(v *Value) bool {
 func rewriteValuegeneric_OpLsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Lsh8x8   <t> x (Const8  [c]))
+       // match: (Lsh8x8 <t> x (Const8 [c]))
        // cond:
        // result: (Lsh8x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -7915,7 +10813,7 @@ func rewriteValuegeneric_OpLsh8x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Lsh8x8   (Const8 [0]) _)
+       // match: (Lsh8x8 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -7978,7 +10876,7 @@ func rewriteValuegeneric_OpMod16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mod16  <t> x (Const16 [c]))
+       // match: (Mod16 <t> x (Const16 [c]))
        // cond: x.Op != OpConst16 && (c > 0 || c == -1<<15)
        // result: (Sub16 x (Mul16 <t> (Div16  <t> x (Const16 <t> [c])) (Const16 <t> [c])))
        for {
@@ -8131,7 +11029,7 @@ func rewriteValuegeneric_OpMod32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mod32  <t> x (Const32 [c]))
+       // match: (Mod32 <t> x (Const32 [c]))
        // cond: x.Op != OpConst32 && (c > 0 || c == -1<<31)
        // result: (Sub32 x (Mul32 <t> (Div32  <t> x (Const32 <t> [c])) (Const32 <t> [c])))
        for {
@@ -8284,7 +11182,7 @@ func rewriteValuegeneric_OpMod64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mod64  <t> x (Const64 [c]))
+       // match: (Mod64 <t> x (Const64 [c]))
        // cond: x.Op != OpConst64 && (c > 0 || c == -1<<63)
        // result: (Sub64 x (Mul64 <t> (Div64  <t> x (Const64 <t> [c])) (Const64 <t> [c])))
        for {
@@ -8394,7 +11292,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool {
 func rewriteValuegeneric_OpMod8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Mod8  (Const8  [c]) (Const8  [d]))
+       // match: (Mod8 (Const8 [c]) (Const8 [d]))
        // cond: d != 0
        // result: (Const8  [int64(int8(c % d))])
        for {
@@ -8415,7 +11313,7 @@ func rewriteValuegeneric_OpMod8(v *Value) bool {
                v.AuxInt = int64(int8(c % d))
                return true
        }
-       // match: (Mod8  <t> n (Const8  [c]))
+       // match: (Mod8 <t> n (Const8 [c]))
        // cond: c < 0 && c != -1<<7
        // result: (Mod8  <t> n (Const8  <t> [-c]))
        for {
@@ -8437,7 +11335,7 @@ func rewriteValuegeneric_OpMod8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mod8   <t> x (Const8  [c]))
+       // match: (Mod8 <t> x (Const8 [c]))
        // cond: x.Op != OpConst8  && (c > 0 || c == -1<<7)
        // result: (Sub8  x (Mul8  <t> (Div8   <t> x (Const8  <t> [c])) (Const8  <t> [c])))
        for {
@@ -8471,7 +11369,7 @@ func rewriteValuegeneric_OpMod8(v *Value) bool {
 func rewriteValuegeneric_OpMod8u(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Mod8u  (Const8 [c])  (Const8  [d]))
+       // match: (Mod8u (Const8 [c]) (Const8 [d]))
        // cond: d != 0
        // result: (Const8  [int64(uint8(c) % uint8(d))])
        for {
@@ -8492,7 +11390,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool {
                v.AuxInt = int64(uint8(c) % uint8(d))
                return true
        }
-       // match: (Mod8u  <t> n (Const8  [c]))
+       // match: (Mod8u <t> n (Const8 [c]))
        // cond: isPowerOfTwo(c&0xff)
        // result: (And8 n (Const8 <t> [(c&0xff)-1]))
        for {
@@ -8513,7 +11411,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mod8u  <t> x (Const8  [c]))
+       // match: (Mod8u <t> x (Const8 [c]))
        // cond: x.Op != OpConst8  && c > 0 && umagicOK(8 ,c)
        // result: (Sub8  x (Mul8  <t> (Div8u  <t> x (Const8  <t> [c])) (Const8  <t> [c])))
        for {
@@ -8544,35 +11442,402 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool {
        }
        return false
 }
-func rewriteValuegeneric_OpMul16(v *Value) bool {
+func rewriteValuegeneric_OpMul16(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (Mul16 (Const16 [c]) (Const16 [d]))
+       // cond:
+       // result: (Const16 [int64(int16(c*d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c * d))
+               return true
+       }
+       // match: (Mul16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (Const16 [int64(int16(c*d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c * d))
+               return true
+       }
+       // match: (Mul16 (Const16 [1]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 x (Const16 [1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 (Const16 [-1]) x)
+       // cond:
+       // result: (Neg16 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               if v_0.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpNeg16)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 x (Const16 [-1]))
+       // cond:
+       // result: (Neg16 x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpNeg16)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 <t> n (Const16 [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(c)]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpLsh16x64)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v0.AuxInt = log2(c)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mul16 <t> (Const16 [c]) n)
+       // cond: isPowerOfTwo(c)
+       // result: (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(c)]))
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpLsh16x64)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v0.AuxInt = log2(c)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mul16 <t> n (Const16 [c]))
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg16 (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
+                       break
+               }
+               v.reset(OpNeg16)
+               v0 := b.NewValue0(v.Pos, OpLsh16x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mul16 <t> (Const16 [c]) n)
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg16 (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
+                       break
+               }
+               v.reset(OpNeg16)
+               v0 := b.NewValue0(v.Pos, OpLsh16x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mul16 (Const16 [0]) _)
+       // cond:
+       // result: (Const16 [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst16)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Mul16 _ (Const16 [0]))
+       // cond:
+       // result: (Const16 [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpConst16)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Mul16 (Const16 <t> [c]) (Mul16 (Const16 <t> [d]) x))
+       // cond:
+       // result: (Mul16 (Const16 <t> [int64(int16(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpMul16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 (Const16 <t> [c]) (Mul16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Mul16 (Const16 <t> [int64(int16(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpMul16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 (Mul16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Mul16 (Const16 <t> [int64(int16(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul16 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMul16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul16 (Mul16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (Mul16 (Const16 <t> [int64(int16(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMul16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpMul32(v *Value) bool {
        b := v.Block
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mul16  (Const16 [c])  (Const16 [d]))
+       // match: (Mul32 (Const32 [c]) (Const32 [d]))
        // cond:
-       // result: (Const16 [int64(int16(c*d))])
+       // result: (Const32 [int64(int32(c*d))])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst32 {
                        break
                }
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_1.Op != OpConst32 {
                        break
                }
                d := v_1.AuxInt
-               v.reset(OpConst16)
-               v.AuxInt = int64(int16(c * d))
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c * d))
                return true
        }
-       // match: (Mul16 (Const16 [1]) x)
+       // match: (Mul32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (Const32 [int64(int32(c*d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c * d))
+               return true
+       }
+       // match: (Mul32 (Const32 [1]) x)
        // cond:
        // result: x
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst32 {
                        break
                }
                if v_0.AuxInt != 1 {
@@ -8584,37 +11849,70 @@ func rewriteValuegeneric_OpMul16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Mul16 (Const16 [-1]) x)
+       // match: (Mul32 x (Const32 [1]))
        // cond:
-       // result: (Neg16 x)
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul32 (Const32 [-1]) x)
+       // cond:
+       // result: (Neg32 x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst32 {
                        break
                }
                if v_0.AuxInt != -1 {
                        break
                }
                x := v.Args[1]
-               v.reset(OpNeg16)
+               v.reset(OpNeg32)
                v.AddArg(x)
                return true
        }
-       // match: (Mul16 <t> n (Const16 [c]))
+       // match: (Mul32 x (Const32 [-1]))
+       // cond:
+       // result: (Neg32 x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpNeg32)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul32 <t> n (Const32 [c]))
        // cond: isPowerOfTwo(c)
-       // result: (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(c)]))
+       // result: (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(c)]))
        for {
                t := v.Type
                n := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_1.Op != OpConst32 {
                        break
                }
                c := v_1.AuxInt
                if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpLsh16x64)
+               v.reset(OpLsh32x64)
                v.Type = t
                v.AddArg(n)
                v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
@@ -8622,98 +11920,93 @@ func rewriteValuegeneric_OpMul16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mul16 <t> n (Const16 [c]))
-       // cond: t.IsSigned() && isPowerOfTwo(-c)
-       // result: (Neg16 (Lsh16x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
+       // match: (Mul32 <t> (Const32 [c]) n)
+       // cond: isPowerOfTwo(c)
+       // result: (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(c)]))
        for {
                t := v.Type
-               n := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
                        break
                }
-               c := v_1.AuxInt
-               if !(t.IsSigned() && isPowerOfTwo(-c)) {
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpNeg16)
-               v0 := b.NewValue0(v.Pos, OpLsh16x64, t)
-               v0.AddArg(n)
-               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
-               v1.AuxInt = log2(-c)
-               v0.AddArg(v1)
+               v.reset(OpLsh32x64)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v0.AuxInt = log2(c)
                v.AddArg(v0)
                return true
        }
-       // match: (Mul16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Mul16 (Const16 <t> [c]) x)
+       // match: (Mul32 <t> n (Const32 [c]))
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg32 (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
        for {
-               x := v.Args[0]
+               t := v.Type
+               n := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst16 {
+               if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
                        break
                }
-               v.reset(OpMul16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
+               v.reset(OpNeg32)
+               v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
                v.AddArg(v0)
-               v.AddArg(x)
                return true
        }
-       // match: (Mul16 (Const16 [0]) _)
-       // cond:
-       // result: (Const16 [0])
+       // match: (Mul32 <t> (Const32 [c]) n)
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg32 (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
        for {
+               t := v.Type
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
-                       break
-               }
-               if v_0.AuxInt != 0 {
-                       break
-               }
-               v.reset(OpConst16)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (Mul16 x l:(Mul16 _ _))
-       // cond: (x.Op != OpMul16 && x.Op != OpConst16)
-       // result: (Mul16 l x)
-       for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpMul16 {
+               if v_0.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpMul16 && x.Op != OpConst16) {
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
                        break
                }
-               v.reset(OpMul16)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.reset(OpNeg32)
+               v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (Mul16 (Const16 <t> [c]) (Mul16 (Const16 <t> [d]) x))
+       // match: (Mul32 (Const32 <t> [c]) (Add32 <t> (Const32 <t> [d]) x))
        // cond:
-       // result: (Mul16 (Const16 <t> [int64(int16(c*d))]) x)
+       // result: (Add32 (Const32 <t> [int64(int32(c*d))]) (Mul32 <t> (Const32 <t> [c]) x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpConst32 {
                        break
                }
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpMul16 {
+               if v_1.Op != OpAdd32 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
                v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               if v_1_0.Op != OpConst32 {
                        break
                }
                if v_1_0.Type != t {
@@ -8721,140 +12014,165 @@ func rewriteValuegeneric_OpMul16(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpMul16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = int64(int16(c * d))
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
                v.AddArg(v0)
-               v.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpMul32, t)
+               v2 := b.NewValue0(v.Pos, OpConst32, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpMul32(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (Mul32  (Const32 [c])  (Const32 [d]))
+       // match: (Mul32 (Const32 <t> [c]) (Add32 <t> x (Const32 <t> [d])))
        // cond:
-       // result: (Const32 [int64(int32(c*d))])
+       // result: (Add32 (Const32 <t> [int64(int32(c*d))]) (Mul32 <t> (Const32 <t> [c]) x))
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
                        break
                }
+               t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               if v_1.Op != OpAdd32 {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpConst32)
-               v.AuxInt = int64(int32(c * d))
+               if v_1.Type != t {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul32, t)
+               v2 := b.NewValue0(v.Pos, OpConst32, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
                return true
        }
-       // match: (Mul32 (Const32 [1]) x)
+       // match: (Mul32 (Add32 <t> (Const32 <t> [d]) x) (Const32 <t> [c]))
        // cond:
-       // result: x
+       // result: (Add32 (Const32 <t> [int64(int32(c*d))]) (Mul32 <t> (Const32 <t> [c]) x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpAdd32 {
                        break
                }
-               if v_0.AuxInt != 1 {
+               t := v_0.Type
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               if v_0_0.Type != t {
+                       break
+               }
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul32, t)
+               v2 := b.NewValue0(v.Pos, OpConst32, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
                return true
        }
-       // match: (Mul32 (Const32 [-1]) x)
+       // match: (Mul32 (Add32 <t> x (Const32 <t> [d])) (Const32 <t> [c]))
        // cond:
-       // result: (Neg32 x)
+       // result: (Add32 (Const32 <t> [int64(int32(c*d))]) (Mul32 <t> (Const32 <t> [c]) x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpAdd32 {
                        break
                }
-               if v_0.AuxInt != -1 {
+               t := v_0.Type
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpNeg32)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Mul32 <t> n (Const32 [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(c)]))
-       for {
-               t := v.Type
-               n := v.Args[0]
+               if v_0_1.Type != t {
+                       break
+               }
+               d := v_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               if v_1.Type != t {
                        break
                }
-               v.reset(OpLsh32x64)
-               v.Type = t
-               v.AddArg(n)
-               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
-               v0.AuxInt = log2(c)
+               c := v_1.AuxInt
+               v.reset(OpAdd32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
                v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul32, t)
+               v2 := b.NewValue0(v.Pos, OpConst32, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
                return true
        }
-       // match: (Mul32 <t> n (Const32 [c]))
-       // cond: t.IsSigned() && isPowerOfTwo(-c)
-       // result: (Neg32 (Lsh32x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
+       // match: (Mul32 (Const32 [0]) _)
+       // cond:
+       // result: (Const32 [0])
        for {
-               t := v.Type
-               n := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst32 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
                        break
                }
-               c := v_1.AuxInt
-               if !(t.IsSigned() && isPowerOfTwo(-c)) {
+               if v_0.AuxInt != 0 {
                        break
                }
-               v.reset(OpNeg32)
-               v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
-               v0.AddArg(n)
-               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
-               v1.AuxInt = log2(-c)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               v.reset(OpConst32)
+               v.AuxInt = 0
                return true
        }
-       // match: (Mul32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Mul32 (Const32 <t> [c]) x)
+       // match: (Mul32 _ (Const32 [0]))
+       // cond:
+       // result: (Const32 [0])
        for {
-               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
-               c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpMul32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst32)
+               v.AuxInt = 0
                return true
        }
-       // match: (Mul32 (Const32 <t> [c]) (Add32 <t> (Const32 <t> [d]) x))
+       // match: (Mul32 (Const32 <t> [c]) (Mul32 (Const32 <t> [d]) x))
        // cond:
-       // result: (Add32 (Const32 <t> [int64(int32(c*d))]) (Mul32 <t> (Const32 <t> [c]) x))
+       // result: (Mul32 (Const32 <t> [int64(int32(c*d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
@@ -8863,10 +12181,7 @@ func rewriteValuegeneric_OpMul32(v *Value) bool {
                t := v_0.Type
                c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpAdd32 {
-                       break
-               }
-               if v_1.Type != t {
+               if v_1.Op != OpMul32 {
                        break
                }
                v_1_0 := v_1.Args[0]
@@ -8878,73 +12193,96 @@ func rewriteValuegeneric_OpMul32(v *Value) bool {
                }
                d := v_1_0.AuxInt
                x := v_1.Args[1]
-               v.reset(OpAdd32)
+               v.reset(OpMul32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
                v0.AuxInt = int64(int32(c * d))
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpMul32, t)
-               v2 := b.NewValue0(v.Pos, OpConst32, t)
-               v2.AuxInt = c
-               v1.AddArg(v2)
-               v1.AddArg(x)
-               v.AddArg(v1)
+               v.AddArg(x)
                return true
        }
-       // match: (Mul32 (Const32 [0]) _)
+       // match: (Mul32 (Const32 <t> [c]) (Mul32 x (Const32 <t> [d])))
        // cond:
-       // result: (Const32 [0])
+       // result: (Mul32 (Const32 <t> [int64(int32(c*d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpConst32 {
                        break
                }
-               if v_0.AuxInt != 0 {
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul32 {
                        break
                }
-               v.reset(OpConst32)
-               v.AuxInt = 0
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpMul32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Mul32 x l:(Mul32 _ _))
-       // cond: (x.Op != OpMul32 && x.Op != OpConst32)
-       // result: (Mul32 l x)
+       // match: (Mul32 (Mul32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (Mul32 (Const32 <t> [int64(int32(c*d))]) x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpMul32 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpMul32 && x.Op != OpConst32) {
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpMul32)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c * d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Mul32 (Const32 <t> [c]) (Mul32 (Const32 <t> [d]) x))
+       // match: (Mul32 (Mul32 x (Const32 <t> [d])) (Const32 <t> [c]))
        // cond:
        // result: (Mul32 (Const32 <t> [int64(int32(c*d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpMul32 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpMul32 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst32 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpMul32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
                v0.AuxInt = int64(int32(c * d))
@@ -8955,8 +12293,6 @@ func rewriteValuegeneric_OpMul32(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpMul32F(v *Value) bool {
-       b := v.Block
-       _ = b
        // match: (Mul32F (Const32F [c]) (Const32F [d]))
        // cond:
        // result: (Const32F [f2i(float64(i2f32(c) * i2f32(d)))])
@@ -8975,6 +12311,24 @@ func rewriteValuegeneric_OpMul32F(v *Value) bool {
                v.AuxInt = f2i(float64(i2f32(c) * i2f32(d)))
                return true
        }
+       // match: (Mul32F (Const32F [d]) (Const32F [c]))
+       // cond:
+       // result: (Const32F [f2i(float64(i2f32(c) * i2f32(d)))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32F {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32F {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst32F)
+               v.AuxInt = f2i(float64(i2f32(c) * i2f32(d)))
+               return true
+       }
        // match: (Mul32F x (Const32F [f2i(1)]))
        // cond:
        // result: x
@@ -9058,23 +12412,21 @@ func rewriteValuegeneric_OpMul32F(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Mul32F x (Const32F [f2i(-2)]))
+       // match: (Mul32F (Const32F [f2i(2)]) x)
        // cond:
-       // result: (Neg32F (Add32F <v.Type> x x))
+       // result: (Add32F x x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst32F {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32F {
                        break
                }
-               if v_1.AuxInt != f2i(-2) {
+               if v_0.AuxInt != f2i(2) {
                        break
                }
-               v.reset(OpNeg32F)
-               v0 := b.NewValue0(v.Pos, OpAdd32F, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               x := v.Args[1]
+               v.reset(OpAdd32F)
+               v.AddArg(x)
+               v.AddArg(x)
                return true
        }
        return false
@@ -9084,7 +12436,7 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mul64  (Const64 [c])  (Const64 [d]))
+       // match: (Mul64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c*d])
        for {
@@ -9102,6 +12454,24 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                v.AuxInt = c * d
                return true
        }
+       // match: (Mul64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (Const64 [c*d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst64)
+               v.AuxInt = c * d
+               return true
+       }
        // match: (Mul64 (Const64 [1]) x)
        // cond:
        // result: x
@@ -9119,6 +12489,23 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Mul64 x (Const64 [1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Mul64 (Const64 [-1]) x)
        // cond:
        // result: (Neg64 x)
@@ -9130,22 +12517,60 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                if v_0.AuxInt != -1 {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpNeg64)
-               v.AddArg(x)
+               x := v.Args[1]
+               v.reset(OpNeg64)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul64 x (Const64 [-1]))
+       // cond:
+       // result: (Neg64 x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpNeg64)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul64 <t> n (Const64 [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (Lsh64x64 <t> n (Const64 <types.UInt64> [log2(c)]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpLsh64x64)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v0.AuxInt = log2(c)
+               v.AddArg(v0)
                return true
        }
-       // match: (Mul64 <t> n (Const64 [c]))
+       // match: (Mul64 <t> (Const64 [c]) n)
        // cond: isPowerOfTwo(c)
        // result: (Lsh64x64 <t> n (Const64 <types.UInt64> [log2(c)]))
        for {
                t := v.Type
-               n := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               n := v.Args[1]
                if !(isPowerOfTwo(c)) {
                        break
                }
@@ -9180,25 +12605,27 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mul64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Mul64 (Const64 <t> [c]) x)
+       // match: (Mul64 <t> (Const64 [c]) n)
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg64 (Lsh64x64 <t> n (Const64 <types.UInt64> [log2(-c)])))
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
-               c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
                        break
                }
-               v.reset(OpMul64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
+               v.reset(OpNeg64)
+               v0 := b.NewValue0(v.Pos, OpLsh64x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
                v.AddArg(v0)
-               v.AddArg(x)
                return true
        }
        // match: (Mul64 (Const64 <t> [c]) (Add64 <t> (Const64 <t> [d]) x))
@@ -9239,6 +12666,120 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                v.AddArg(v1)
                return true
        }
+       // match: (Mul64 (Const64 <t> [c]) (Add64 <t> x (Const64 <t> [d])))
+       // cond:
+       // result: (Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul64, t)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
+               return true
+       }
+       // match: (Mul64 (Add64 <t> (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               t := v_0.Type
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               if v_0_0.Type != t {
+                       break
+               }
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul64, t)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
+               return true
+       }
+       // match: (Mul64 (Add64 <t> x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               t := v_0.Type
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               if v_0_1.Type != t {
+                       break
+               }
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpMul64, t)
+               v2 := b.NewValue0(v.Pos, OpConst64, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v.AddArg(v1)
+               return true
+       }
        // match: (Mul64 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
@@ -9254,21 +12795,19 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Mul64 x l:(Mul64 _ _))
-       // cond: (x.Op != OpMul64 && x.Op != OpConst64)
-       // result: (Mul64 l x)
+       // match: (Mul64 _ (Const64 [0]))
+       // cond:
+       // result: (Const64 [0])
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpMul64 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
-               if !(x.Op != OpMul64 && x.Op != OpConst64) {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpMul64)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.reset(OpConst64)
+               v.AuxInt = 0
                return true
        }
        // match: (Mul64 (Const64 <t> [c]) (Mul64 (Const64 <t> [d]) x))
@@ -9301,11 +12840,99 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Mul64 (Const64 <t> [c]) (Mul64 x (Const64 <t> [d])))
+       // cond:
+       // result: (Mul64 (Const64 <t> [c*d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpMul64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul64 (Mul64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Mul64 (Const64 <t> [c*d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul64 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMul64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul64 (Mul64 x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (Mul64 (Const64 <t> [c*d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMul64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c * d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpMul64F(v *Value) bool {
-       b := v.Block
-       _ = b
        // match: (Mul64F (Const64F [c]) (Const64F [d]))
        // cond:
        // result: (Const64F [f2i(i2f(c) * i2f(d))])
@@ -9314,12 +12941,30 @@ func rewriteValuegeneric_OpMul64F(v *Value) bool {
                if v_0.Op != OpConst64F {
                        break
                }
-               c := v_0.AuxInt
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64F {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConst64F)
+               v.AuxInt = f2i(i2f(c) * i2f(d))
+               return true
+       }
+       // match: (Mul64F (Const64F [d]) (Const64F [c]))
+       // cond:
+       // result: (Const64F [f2i(i2f(c) * i2f(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64F {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64F {
                        break
                }
-               d := v_1.AuxInt
+               c := v_1.AuxInt
                v.reset(OpConst64F)
                v.AuxInt = f2i(i2f(c) * i2f(d))
                return true
@@ -9407,23 +13052,21 @@ func rewriteValuegeneric_OpMul64F(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Mul64F x (Const64F [f2i(-2)]))
+       // match: (Mul64F (Const64F [f2i(2)]) x)
        // cond:
-       // result: (Neg64F (Add64F <v.Type> x x))
+       // result: (Add64F x x)
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst64F {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64F {
                        break
                }
-               if v_1.AuxInt != f2i(-2) {
+               if v_0.AuxInt != f2i(2) {
                        break
                }
-               v.reset(OpNeg64F)
-               v0 := b.NewValue0(v.Pos, OpAdd64F, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               x := v.Args[1]
+               v.reset(OpAdd64F)
+               v.AddArg(x)
+               v.AddArg(x)
                return true
        }
        return false
@@ -9433,7 +13076,7 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Mul8   (Const8 [c])   (Const8 [d]))
+       // match: (Mul8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8  [int64(int8(c*d))])
        for {
@@ -9451,7 +13094,25 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AuxInt = int64(int8(c * d))
                return true
        }
-       // match: (Mul8  (Const8  [1]) x)
+       // match: (Mul8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (Const8  [int64(int8(c*d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(c * d))
+               return true
+       }
+       // match: (Mul8 (Const8 [1]) x)
        // cond:
        // result: x
        for {
@@ -9468,7 +13129,24 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Mul8  (Const8  [-1]) x)
+       // match: (Mul8 x (Const8 [1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul8 (Const8 [-1]) x)
        // cond:
        // result: (Neg8  x)
        for {
@@ -9484,7 +13162,23 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Mul8  <t> n (Const8  [c]))
+       // match: (Mul8 x (Const8 [-1]))
+       // cond:
+       // result: (Neg8  x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpNeg8)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul8 <t> n (Const8 [c]))
        // cond: isPowerOfTwo(c)
        // result: (Lsh8x64  <t> n (Const64 <types.UInt64> [log2(c)]))
        for {
@@ -9506,7 +13200,29 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mul8  <t> n (Const8  [c]))
+       // match: (Mul8 <t> (Const8 [c]) n)
+       // cond: isPowerOfTwo(c)
+       // result: (Lsh8x64  <t> n (Const64 <types.UInt64> [log2(c)]))
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpLsh8x64)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v0.AuxInt = log2(c)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mul8 <t> n (Const8 [c]))
        // cond: t.IsSigned() && isPowerOfTwo(-c)
        // result: (Neg8  (Lsh8x64  <t> n (Const64 <types.UInt64> [log2(-c)])))
        for {
@@ -9529,28 +13245,30 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Mul8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (Mul8  (Const8  <t> [c]) x)
+       // match: (Mul8 <t> (Const8 [c]) n)
+       // cond: t.IsSigned() && isPowerOfTwo(-c)
+       // result: (Neg8  (Lsh8x64  <t> n (Const64 <types.UInt64> [log2(-c)])))
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst8 {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
-               c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
+               c := v_0.AuxInt
+               n := v.Args[1]
+               if !(t.IsSigned() && isPowerOfTwo(-c)) {
                        break
                }
-               v.reset(OpMul8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
+               v.reset(OpNeg8)
+               v0 := b.NewValue0(v.Pos, OpLsh8x64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, types.UInt64)
+               v1.AuxInt = log2(-c)
+               v0.AddArg(v1)
                v.AddArg(v0)
-               v.AddArg(x)
                return true
        }
-       // match: (Mul8  (Const8  [0]) _)
+       // match: (Mul8 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -9565,24 +13283,22 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Mul8  x l:(Mul8  _ _))
-       // cond: (x.Op != OpMul8  && x.Op != OpConst8)
-       // result: (Mul8  l x)
+       // match: (Mul8 _ (Const8 [0]))
+       // cond:
+       // result: (Const8  [0])
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpMul8 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
                        break
                }
-               if !(x.Op != OpMul8 && x.Op != OpConst8) {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpMul8)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = 0
                return true
        }
-       // match: (Mul8  (Const8  <t> [c]) (Mul8  (Const8  <t> [d]) x))
+       // match: (Mul8 (Const8 <t> [c]) (Mul8 (Const8 <t> [d]) x))
        // cond:
        // result: (Mul8  (Const8  <t> [int64(int8(c*d))]) x)
        for {
@@ -9612,10 +13328,100 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Mul8 (Const8 <t> [c]) (Mul8 x (Const8 <t> [d])))
+       // cond:
+       // result: (Mul8  (Const8  <t> [int64(int8(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul8 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpMul8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul8 (Mul8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Mul8  (Const8  <t> [int64(int8(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMul8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Mul8 (Mul8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (Mul8  (Const8  <t> [int64(int8(c*d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpMul8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c * d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpNeg16(v *Value) bool {
-       // match: (Neg16  (Const16  [c]))
+       // match: (Neg16 (Const16 [c]))
        // cond:
        // result: (Const16  [int64(-int16(c))])
        for {
@@ -9646,7 +13452,7 @@ func rewriteValuegeneric_OpNeg16(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpNeg32(v *Value) bool {
-       // match: (Neg32  (Const32  [c]))
+       // match: (Neg32 (Const32 [c]))
        // cond:
        // result: (Const32  [int64(-int32(c))])
        for {
@@ -9696,7 +13502,7 @@ func rewriteValuegeneric_OpNeg32F(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpNeg64(v *Value) bool {
-       // match: (Neg64  (Const64  [c]))
+       // match: (Neg64 (Const64 [c]))
        // cond:
        // result: (Const64  [-c])
        for {
@@ -9746,7 +13552,7 @@ func rewriteValuegeneric_OpNeg64F(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpNeg8(v *Value) bool {
-       // match: (Neg8   (Const8   [c]))
+       // match: (Neg8 (Const8 [c]))
        // cond:
        // result: (Const8   [int64( -int8(c))])
        for {
@@ -9759,7 +13565,7 @@ func rewriteValuegeneric_OpNeg8(v *Value) bool {
                v.AuxInt = int64(-int8(c))
                return true
        }
-       // match: (Neg8  (Sub8  x y))
+       // match: (Neg8 (Sub8 x y))
        // cond:
        // result: (Sub8  y x)
        for {
@@ -9805,15 +13611,75 @@ func rewriteValuegeneric_OpNeq16(v *Value) bool {
                if v_1.Op != OpAdd16 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpNeq16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq16 (Const16 <t> [c]) (Add16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Neq16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpNeq16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq16 (Add16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Neq16 (Const16 <t> [int64(int16(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpNeq16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
                v0.AuxInt = int64(int16(c - d))
@@ -9821,23 +13687,32 @@ func rewriteValuegeneric_OpNeq16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Neq16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Neq16 (Const16 <t> [c]) x)
+       // match: (Neq16 (Add16 x (Const16 <t> [d])) (Const16 <t> [c]))
+       // cond:
+       // result: (Neq16 (Const16 <t> [int64(int16(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
-               c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpNeq16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int16(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
@@ -9860,6 +13735,24 @@ func rewriteValuegeneric_OpNeq16(v *Value) bool {
                v.AuxInt = b2i(c != d)
                return true
        }
+       // match: (Neq16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c != d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c != d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpNeq32(v *Value) bool {
@@ -9907,23 +13800,92 @@ func rewriteValuegeneric_OpNeq32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Neq32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Neq32 (Const32 <t> [c]) x)
+       // match: (Neq32 (Const32 <t> [c]) (Add32 x (Const32 <t> [d])))
+       // cond:
+       // result: (Neq32 (Const32 <t> [int64(int32(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd32 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpNeq32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq32 (Add32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (Neq32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
+               v.reset(OpNeq32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq32 (Add32 x (Const32 <t> [d])) (Const32 <t> [c]))
+       // cond:
+       // result: (Neq32 (Const32 <t> [int64(int32(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpNeq32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int32(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
@@ -9946,6 +13908,24 @@ func rewriteValuegeneric_OpNeq32(v *Value) bool {
                v.AuxInt = b2i(c != d)
                return true
        }
+       // match: (Neq32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c != d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c != d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpNeq64(v *Value) bool {
@@ -9993,23 +13973,92 @@ func rewriteValuegeneric_OpNeq64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Neq64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Neq64 (Const64 <t> [c]) x)
+       // match: (Neq64 (Const64 <t> [c]) (Add64 x (Const64 <t> [d])))
+       // cond:
+       // result: (Neq64 (Const64 <t> [c-d]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpNeq64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq64 (Add64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Neq64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
+               v.reset(OpNeq64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c - d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq64 (Add64 x (Const64 <t> [d])) (Const64 <t> [c]))
+       // cond:
+       // result: (Neq64 (Const64 <t> [c-d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpNeq64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
+               v0.AuxInt = c - d
                v.AddArg(v0)
                v.AddArg(x)
                return true
@@ -10032,12 +14081,30 @@ func rewriteValuegeneric_OpNeq64(v *Value) bool {
                v.AuxInt = b2i(c != d)
                return true
        }
+       // match: (Neq64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c != d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c != d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpNeq8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Neq8  x x)
+       // match: (Neq8 x x)
        // cond:
        // result: (ConstBool [0])
        for {
@@ -10049,7 +14116,37 @@ func rewriteValuegeneric_OpNeq8(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Neq8  (Const8  <t> [c]) (Add8  (Const8  <t> [d]) x))
+       // match: (Neq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x))
+       // cond:
+       // result: (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpAdd8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst8 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpNeq8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq8 (Const8 <t> [c]) (Add8 x (Const8 <t> [d])))
        // cond:
        // result: (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
        for {
@@ -10063,15 +14160,15 @@ func rewriteValuegeneric_OpNeq8(v *Value) bool {
                if v_1.Op != OpAdd8 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst8 {
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               d := v_1_1.AuxInt
                v.reset(OpNeq8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
                v0.AuxInt = int64(int8(c - d))
@@ -10079,28 +14176,67 @@ func rewriteValuegeneric_OpNeq8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Neq8  x (Const8 <t>  [c]))
-       // cond: x.Op != OpConst8
-       // result: (Neq8  (Const8  <t> [c]) x)
+       // match: (Neq8 (Add8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
+               if v_1.Type != t {
+                       break
+               }
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
+               v.reset(OpNeq8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c - d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Neq8 (Add8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (Neq8 (Const8 <t> [int64(int8(c-d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
                        break
                }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpNeq8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
+               v0.AuxInt = int64(int8(c - d))
                v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Neq8  (Const8  [c]) (Const8  [d]))
+       // match: (Neq8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (ConstBool [b2i(c != d)])
        for {
@@ -10118,6 +14254,24 @@ func rewriteValuegeneric_OpNeq8(v *Value) bool {
                v.AuxInt = b2i(c != d)
                return true
        }
+       // match: (Neq8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (ConstBool [b2i(c != d)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpConstBool)
+               v.AuxInt = b2i(c != d)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpNeqB(v *Value) bool {
@@ -10375,7 +14529,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Eq8  x y))
+       // match: (Not (Eq8 x y))
        // cond:
        // result: (Neq8  x y)
        for {
@@ -10390,7 +14544,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (EqB  x y))
+       // match: (Not (EqB x y))
        // cond:
        // result: (NeqB  x y)
        for {
@@ -10450,7 +14604,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Neq8  x y))
+       // match: (Not (Neq8 x y))
        // cond:
        // result: (Eq8  x y)
        for {
@@ -10465,7 +14619,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (NeqB  x y))
+       // match: (Not (NeqB x y))
        // cond:
        // result: (EqB  x y)
        for {
@@ -10525,7 +14679,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Greater8  x y))
+       // match: (Not (Greater8 x y))
        // cond:
        // result: (Leq8  x y)
        for {
@@ -10585,7 +14739,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Greater8U  x y))
+       // match: (Not (Greater8U x y))
        // cond:
        // result: (Leq8U  x y)
        for {
@@ -10645,7 +14799,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Geq8  x y))
+       // match: (Not (Geq8 x y))
        // cond:
        // result: (Less8  x y)
        for {
@@ -10705,7 +14859,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Geq8U  x y))
+       // match: (Not (Geq8U x y))
        // cond:
        // result: (Less8U  x y)
        for {
@@ -10765,7 +14919,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Less8  x y))
+       // match: (Not (Less8 x y))
        // cond:
        // result: (Geq8  x y)
        for {
@@ -10825,7 +14979,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Less8U  x y))
+       // match: (Not (Less8U x y))
        // cond:
        // result: (Geq8U  x y)
        for {
@@ -10885,7 +15039,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Leq8  x y))
+       // match: (Not (Leq8 x y))
        // cond:
        // result: (Greater8 x y)
        for {
@@ -10945,7 +15099,7 @@ func rewriteValuegeneric_OpNot(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Not (Leq8U  x y))
+       // match: (Not (Leq8U x y))
        // cond:
        // result: (Greater8U  x y)
        for {
@@ -11000,7 +15154,7 @@ func rewriteValuegeneric_OpOffPtr(v *Value) bool {
 func rewriteValuegeneric_OpOr16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Or16  (Const16 [c])  (Const16 [d]))
+       // match: (Or16 (Const16 [c]) (Const16 [d]))
        // cond:
        // result: (Const16 [int64(int16(c|d))])
        for {
@@ -11018,25 +15172,22 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                v.AuxInt = int64(int16(c | d))
                return true
        }
-       // match: (Or16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Or16 (Const16 <t> [c]) x)
+       // match: (Or16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (Const16 [int64(int16(c|d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
-                       break
-               }
-               v.reset(OpOr16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c | d))
                return true
        }
        // match: (Or16 x x)
@@ -11069,6 +15220,23 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Or16 x (Const16 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Or16 (Const16 [-1]) _)
        // cond:
        // result: (Const16 [-1])
@@ -11084,6 +15252,21 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                v.AuxInt = -1
                return true
        }
+       // match: (Or16 _ (Const16 [-1]))
+       // cond:
+       // result: (Const16 [-1])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpConst16)
+               v.AuxInt = -1
+               return true
+       }
        // match: (Or16 x (Or16 x y))
        // cond:
        // result: (Or16 x y)
@@ -11138,7 +15321,7 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or16 (Or16 x y) y)
+       // match: (Or16 (Or16 y x) x)
        // cond:
        // result: (Or16 x y)
        for {
@@ -11146,9 +15329,9 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                if v_0.Op != OpOr16 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpOr16)
@@ -11156,72 +15339,223 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or16 x l:(Or16 _ _))
-       // cond: (x.Op != OpOr16 && x.Op != OpConst16)
-       // result: (Or16 l x)
+       // match: (Or16 (Or16 i:(Const16 <t>) z) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Or16 i (Or16 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr16 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpOr16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or16 (Or16 z i:(Const16 <t>)) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Or16 i (Or16 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr16 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpOr16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or16 x (Or16 i:(Const16 <t>) z))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Or16 i (Or16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr16 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpOr16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or16 x (Or16 z i:(Const16 <t>)))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Or16 i (Or16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr16 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpOr16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or16 (Const16 <t> [c]) (Or16 (Const16 <t> [d]) x))
+       // cond:
+       // result: (Or16 (Const16 <t> [int64(int16(c|d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpOr16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c | d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Or16 (Const16 <t> [c]) (Or16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Or16 (Const16 <t> [int64(int16(c|d))]) x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpOr16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
                        break
                }
-               if !(x.Op != OpOr16 && x.Op != OpConst16) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpOr16)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c | d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Or16 (Or16 i:(Const16 <t>) z) x)
-       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
-       // result: (Or16 i (Or16 <t> z x))
+       // match: (Or16 (Or16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Or16 (Const16 <t> [int64(int16(c|d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpOr16 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst16 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpOr16)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpOr16, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c | d))
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Or16 (Const16 <t> [c]) (Or16 (Const16 <t> [d]) x))
+       // match: (Or16 (Or16 x (Const16 <t> [d])) (Const16 <t> [c]))
        // cond:
        // result: (Or16 (Const16 <t> [int64(int16(c|d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpOr16 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpOr16 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpOr16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
                v0.AuxInt = int64(int16(c | d))
@@ -11234,7 +15568,7 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
 func rewriteValuegeneric_OpOr32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Or32  (Const32 [c])  (Const32 [d]))
+       // match: (Or32 (Const32 [c]) (Const32 [d]))
        // cond:
        // result: (Const32 [int64(int32(c|d))])
        for {
@@ -11252,25 +15586,22 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                v.AuxInt = int64(int32(c | d))
                return true
        }
-       // match: (Or32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Or32 (Const32 <t> [c]) x)
+       // match: (Or32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (Const32 [int64(int32(c|d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
-                       break
-               }
-               v.reset(OpOr32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c | d))
                return true
        }
        // match: (Or32 x x)
@@ -11303,6 +15634,23 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Or32 x (Const32 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Or32 (Const32 [-1]) _)
        // cond:
        // result: (Const32 [-1])
@@ -11318,6 +15666,21 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                v.AuxInt = -1
                return true
        }
+       // match: (Or32 _ (Const32 [-1]))
+       // cond:
+       // result: (Const32 [-1])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpConst32)
+               v.AuxInt = -1
+               return true
+       }
        // match: (Or32 x (Or32 x y))
        // cond:
        // result: (Or32 x y)
@@ -11372,7 +15735,7 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or32 (Or32 x y) y)
+       // match: (Or32 (Or32 y x) x)
        // cond:
        // result: (Or32 x y)
        for {
@@ -11380,9 +15743,9 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                if v_0.Op != OpOr32 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpOr32)
@@ -11390,72 +15753,223 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or32 x l:(Or32 _ _))
-       // cond: (x.Op != OpOr32 && x.Op != OpConst32)
-       // result: (Or32 l x)
+       // match: (Or32 (Or32 i:(Const32 <t>) z) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Or32 i (Or32 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr32 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpOr32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or32 (Or32 z i:(Const32 <t>)) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Or32 i (Or32 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr32 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpOr32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or32 x (Or32 i:(Const32 <t>) z))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Or32 i (Or32 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr32 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpOr32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or32 x (Or32 z i:(Const32 <t>)))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Or32 i (Or32 <t> z x))
        for {
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpOr32 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr32 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpOr32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or32 (Const32 <t> [c]) (Or32 (Const32 <t> [d]) x))
+       // cond:
+       // result: (Or32 (Const32 <t> [int64(int32(c|d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr32 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst32 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpOr32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c | d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Or32 (Const32 <t> [c]) (Or32 x (Const32 <t> [d])))
+       // cond:
+       // result: (Or32 (Const32 <t> [int64(int32(c|d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr32 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpOr32 && x.Op != OpConst32) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpOr32)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c | d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Or32 (Or32 i:(Const32 <t>) z) x)
-       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
-       // result: (Or32 i (Or32 <t> z x))
+       // match: (Or32 (Or32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (Or32 (Const32 <t> [int64(int32(c|d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpOr32 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst32 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpOr32)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpOr32, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c | d))
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Or32 (Const32 <t> [c]) (Or32 (Const32 <t> [d]) x))
+       // match: (Or32 (Or32 x (Const32 <t> [d])) (Const32 <t> [c]))
        // cond:
        // result: (Or32 (Const32 <t> [int64(int32(c|d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpOr32 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpOr32 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst32 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpOr32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
                v0.AuxInt = int64(int32(c | d))
@@ -11468,7 +15982,7 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
 func rewriteValuegeneric_OpOr64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Or64  (Const64 [c])  (Const64 [d]))
+       // match: (Or64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c|d])
        for {
@@ -11486,25 +16000,22 @@ func rewriteValuegeneric_OpOr64(v *Value) bool {
                v.AuxInt = c | d
                return true
        }
-       // match: (Or64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Or64 (Const64 <t> [c]) x)
+       // match: (Or64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (Const64 [c|d])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
-                       break
-               }
-               v.reset(OpOr64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst64)
+               v.AuxInt = c | d
                return true
        }
        // match: (Or64 x x)
@@ -11537,6 +16048,23 @@ func rewriteValuegeneric_OpOr64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Or64 x (Const64 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Or64 (Const64 [-1]) _)
        // cond:
        // result: (Const64 [-1])
@@ -11552,6 +16080,21 @@ func rewriteValuegeneric_OpOr64(v *Value) bool {
                v.AuxInt = -1
                return true
        }
+       // match: (Or64 _ (Const64 [-1]))
+       // cond:
+       // result: (Const64 [-1])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpConst64)
+               v.AuxInt = -1
+               return true
+       }
        // match: (Or64 x (Or64 x y))
        // cond:
        // result: (Or64 x y)
@@ -11606,90 +16149,241 @@ func rewriteValuegeneric_OpOr64(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or64 (Or64 x y) y)
+       // match: (Or64 (Or64 y x) x)
+       // cond:
+       // result: (Or64 x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr64 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpOr64)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (Or64 (Or64 i:(Const64 <t>) z) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Or64 i (Or64 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr64 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpOr64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or64 (Or64 z i:(Const64 <t>)) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Or64 i (Or64 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr64 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpOr64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or64 x (Or64 i:(Const64 <t>) z))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Or64 i (Or64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr64 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpOr64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or64 x (Or64 z i:(Const64 <t>)))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Or64 i (Or64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr64 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpOr64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or64 (Const64 <t> [c]) (Or64 (Const64 <t> [d]) x))
+       // cond:
+       // result: (Or64 (Const64 <t> [c|d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr64 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst64 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpOr64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c | d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Or64 (Const64 <t> [c]) (Or64 x (Const64 <t> [d])))
        // cond:
-       // result: (Or64 x y)
+       // result: (Or64 (Const64 <t> [c|d]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpOr64 {
+               if v_0.Op != OpConst64 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr64 {
                        break
                }
-               v.reset(OpOr64)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (Or64 x l:(Or64 _ _))
-       // cond: (x.Op != OpOr64 && x.Op != OpConst64)
-       // result: (Or64 l x)
-       for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpOr64 {
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
                        break
                }
-               if !(x.Op != OpOr64 && x.Op != OpConst64) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpOr64)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c | d
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Or64 (Or64 i:(Const64 <t>) z) x)
-       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
-       // result: (Or64 i (Or64 <t> z x))
+       // match: (Or64 (Or64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Or64 (Const64 <t> [c|d]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpOr64 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst64 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpOr64)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpOr64, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c | d
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Or64 (Const64 <t> [c]) (Or64 (Const64 <t> [d]) x))
+       // match: (Or64 (Or64 x (Const64 <t> [d])) (Const64 <t> [c]))
        // cond:
        // result: (Or64 (Const64 <t> [c|d]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpOr64 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpOr64 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst64 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpOr64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
                v0.AuxInt = c | d
@@ -11702,7 +16396,7 @@ func rewriteValuegeneric_OpOr64(v *Value) bool {
 func rewriteValuegeneric_OpOr8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Or8   (Const8 [c])   (Const8 [d]))
+       // match: (Or8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8  [int64(int8(c|d))])
        for {
@@ -11720,28 +16414,25 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AuxInt = int64(int8(c | d))
                return true
        }
-       // match: (Or8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (Or8  (Const8  <t> [c]) x)
+       // match: (Or8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (Const8  [int64(int8(c|d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
-                       break
-               }
-               v.reset(OpOr8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(c | d))
                return true
        }
-       // match: (Or8  x x)
+       // match: (Or8 x x)
        // cond:
        // result: x
        for {
@@ -11754,7 +16445,7 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Or8  (Const8  [0]) x)
+       // match: (Or8 (Const8 [0]) x)
        // cond:
        // result: x
        for {
@@ -11771,7 +16462,24 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Or8  (Const8  [-1]) _)
+       // match: (Or8 x (Const8 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Or8 (Const8 [-1]) _)
        // cond:
        // result: (Const8  [-1])
        for {
@@ -11786,7 +16494,22 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AuxInt = -1
                return true
        }
-       // match: (Or8  x (Or8  x y))
+       // match: (Or8 _ (Const8 [-1]))
+       // cond:
+       // result: (Const8  [-1])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpConst8)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (Or8 x (Or8 x y))
        // cond:
        // result: (Or8  x y)
        for {
@@ -11804,7 +16527,7 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or8  x (Or8  y x))
+       // match: (Or8 x (Or8 y x))
        // cond:
        // result: (Or8  x y)
        for {
@@ -11822,7 +16545,25 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Or8  (Or8  x y) x)
+       // match: (Or8 (Or8 x y) x)
+       // cond:
+       // result: (Or8  x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr8 {
+                       break
+               }
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (Or8 (Or8 y x) x)
        // cond:
        // result: (Or8  x y)
        for {
@@ -11830,100 +16571,233 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
                if v_0.Op != OpOr8 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if x != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (Or8 (Or8 i:(Const8 <t>) z) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Or8  i (Or8  <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr8 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or8 (Or8 z i:(Const8 <t>)) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Or8  i (Or8  <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpOr8 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or8 x (Or8 i:(Const8 <t>) z))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Or8  i (Or8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr8 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or8 x (Or8 z i:(Const8 <t>)))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Or8  i (Or8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr8 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpOr8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpOr8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Or8 (Const8 <t> [c]) (Or8 (Const8 <t> [d]) x))
+       // cond:
+       // result: (Or8  (Const8  <t> [int64(int8(c|d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst8 {
+                       break
+               }
+               if v_1_0.Type != t {
                        break
                }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
                v.reset(OpOr8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c | d))
+               v.AddArg(v0)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (Or8  (Or8  x y) y)
+       // match: (Or8 (Const8 <t> [c]) (Or8 x (Const8 <t> [d])))
        // cond:
-       // result: (Or8  x y)
+       // result: (Or8  (Const8  <t> [int64(int8(c|d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpOr8 {
+               if v_0.Op != OpConst8 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpOr8 {
                        break
                }
-               v.reset(OpOr8)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (Or8  x l:(Or8  _ _))
-       // cond: (x.Op != OpOr8  && x.Op != OpConst8)
-       // result: (Or8  l x)
-       for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpOr8 {
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
                        break
                }
-               if !(x.Op != OpOr8 && x.Op != OpConst8) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpOr8)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c | d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Or8  (Or8  i:(Const8  <t>) z) x)
-       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
-       // result: (Or8  i (Or8  <t> z x))
+       // match: (Or8 (Or8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Or8  (Const8  <t> [int64(int8(c|d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpOr8 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst8 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
                        break
                }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpOr8)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpOr8, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c | d))
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Or8  (Const8  <t> [c]) (Or8  (Const8  <t> [d]) x))
+       // match: (Or8 (Or8 x (Const8 <t> [d])) (Const8 <t> [c]))
        // cond:
        // result: (Or8  (Const8  <t> [int64(int8(c|d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst8 {
+               if v_0.Op != OpOr8 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpOr8 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst8 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpOr8)
                v0 := b.NewValue0(v.Pos, OpConst8, t)
                v0.AuxInt = int64(int8(c | d))
@@ -11934,7 +16808,7 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpPhi(v *Value) bool {
-       // match: (Phi (Const8  [c]) (Const8  [c]))
+       // match: (Phi (Const8 [c]) (Const8 [c]))
        // cond:
        // result: (Const8  [c])
        for {
@@ -12324,7 +17198,7 @@ func rewriteValuegeneric_OpRsh16Ux64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16Ux64 (Lsh16x64 x (Const64  [8])) (Const64  [8]))
+       // match: (Rsh16Ux64 (Lsh16x64 x (Const64 [8])) (Const64 [8]))
        // cond:
        // result: (ZeroExt8to16  (Trunc16to8  <types.UInt8>  x))
        for {
@@ -12358,7 +17232,7 @@ func rewriteValuegeneric_OpRsh16Ux64(v *Value) bool {
 func rewriteValuegeneric_OpRsh16Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16Ux8  <t> x (Const8  [c]))
+       // match: (Rsh16Ux8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh16Ux64 x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -12396,7 +17270,7 @@ func rewriteValuegeneric_OpRsh16Ux8(v *Value) bool {
 func rewriteValuegeneric_OpRsh16x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16x16  <t> x (Const16 [c]))
+       // match: (Rsh16x16 <t> x (Const16 [c]))
        // cond:
        // result: (Rsh16x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -12414,7 +17288,7 @@ func rewriteValuegeneric_OpRsh16x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x16  (Const16 [0]) _)
+       // match: (Rsh16x16 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -12434,7 +17308,7 @@ func rewriteValuegeneric_OpRsh16x16(v *Value) bool {
 func rewriteValuegeneric_OpRsh16x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16x32  <t> x (Const32 [c]))
+       // match: (Rsh16x32 <t> x (Const32 [c]))
        // cond:
        // result: (Rsh16x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -12452,7 +17326,7 @@ func rewriteValuegeneric_OpRsh16x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x32  (Const16 [0]) _)
+       // match: (Rsh16x32 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -12474,7 +17348,7 @@ func rewriteValuegeneric_OpRsh16x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh16x64  (Const16 [c]) (Const64 [d]))
+       // match: (Rsh16x64 (Const16 [c]) (Const64 [d]))
        // cond:
        // result: (Const16 [int64(int16(c) >> uint64(d))])
        for {
@@ -12492,7 +17366,7 @@ func rewriteValuegeneric_OpRsh16x64(v *Value) bool {
                v.AuxInt = int64(int16(c) >> uint64(d))
                return true
        }
-       // match: (Rsh16x64  x (Const64 [0]))
+       // match: (Rsh16x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -12509,7 +17383,7 @@ func rewriteValuegeneric_OpRsh16x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh16x64  (Const16 [0]) _)
+       // match: (Rsh16x64 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -12554,7 +17428,7 @@ func rewriteValuegeneric_OpRsh16x64(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x64 (Lsh16x64 x (Const64  [8])) (Const64  [8]))
+       // match: (Rsh16x64 (Lsh16x64 x (Const64 [8])) (Const64 [8]))
        // cond:
        // result: (SignExt8to16  (Trunc16to8  <types.Int8>  x))
        for {
@@ -12588,7 +17462,7 @@ func rewriteValuegeneric_OpRsh16x64(v *Value) bool {
 func rewriteValuegeneric_OpRsh16x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh16x8   <t> x (Const8  [c]))
+       // match: (Rsh16x8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh16x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -12606,7 +17480,7 @@ func rewriteValuegeneric_OpRsh16x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh16x8  (Const16 [0]) _)
+       // match: (Rsh16x8 (Const16 [0]) _)
        // cond:
        // result: (Const16 [0])
        for {
@@ -12901,7 +17775,7 @@ func rewriteValuegeneric_OpRsh32Ux64(v *Value) bool {
 func rewriteValuegeneric_OpRsh32Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32Ux8  <t> x (Const8  [c]))
+       // match: (Rsh32Ux8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh32Ux64 x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -12939,7 +17813,7 @@ func rewriteValuegeneric_OpRsh32Ux8(v *Value) bool {
 func rewriteValuegeneric_OpRsh32x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32x16  <t> x (Const16 [c]))
+       // match: (Rsh32x16 <t> x (Const16 [c]))
        // cond:
        // result: (Rsh32x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -12957,7 +17831,7 @@ func rewriteValuegeneric_OpRsh32x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh32x16  (Const32 [0]) _)
+       // match: (Rsh32x16 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -12977,7 +17851,7 @@ func rewriteValuegeneric_OpRsh32x16(v *Value) bool {
 func rewriteValuegeneric_OpRsh32x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32x32  <t> x (Const32 [c]))
+       // match: (Rsh32x32 <t> x (Const32 [c]))
        // cond:
        // result: (Rsh32x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -12995,7 +17869,7 @@ func rewriteValuegeneric_OpRsh32x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh32x32  (Const32 [0]) _)
+       // match: (Rsh32x32 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -13017,7 +17891,7 @@ func rewriteValuegeneric_OpRsh32x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh32x64  (Const32 [c]) (Const64 [d]))
+       // match: (Rsh32x64 (Const32 [c]) (Const64 [d]))
        // cond:
        // result: (Const32 [int64(int32(c) >> uint64(d))])
        for {
@@ -13035,7 +17909,7 @@ func rewriteValuegeneric_OpRsh32x64(v *Value) bool {
                v.AuxInt = int64(int32(c) >> uint64(d))
                return true
        }
-       // match: (Rsh32x64  x (Const64 [0]))
+       // match: (Rsh32x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -13052,7 +17926,7 @@ func rewriteValuegeneric_OpRsh32x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh32x64  (Const32 [0]) _)
+       // match: (Rsh32x64 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -13160,7 +18034,7 @@ func rewriteValuegeneric_OpRsh32x64(v *Value) bool {
 func rewriteValuegeneric_OpRsh32x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh32x8   <t> x (Const8  [c]))
+       // match: (Rsh32x8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh32x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -13178,7 +18052,7 @@ func rewriteValuegeneric_OpRsh32x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh32x8  (Const32 [0]) _)
+       // match: (Rsh32x8 (Const32 [0]) _)
        // cond:
        // result: (Const32 [0])
        for {
@@ -13502,7 +18376,7 @@ func rewriteValuegeneric_OpRsh64Ux64(v *Value) bool {
 func rewriteValuegeneric_OpRsh64Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64Ux8  <t> x (Const8  [c]))
+       // match: (Rsh64Ux8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh64Ux64 x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -13540,7 +18414,7 @@ func rewriteValuegeneric_OpRsh64Ux8(v *Value) bool {
 func rewriteValuegeneric_OpRsh64x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64x16  <t> x (Const16 [c]))
+       // match: (Rsh64x16 <t> x (Const16 [c]))
        // cond:
        // result: (Rsh64x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -13558,7 +18432,7 @@ func rewriteValuegeneric_OpRsh64x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh64x16  (Const64 [0]) _)
+       // match: (Rsh64x16 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -13578,7 +18452,7 @@ func rewriteValuegeneric_OpRsh64x16(v *Value) bool {
 func rewriteValuegeneric_OpRsh64x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64x32  <t> x (Const32 [c]))
+       // match: (Rsh64x32 <t> x (Const32 [c]))
        // cond:
        // result: (Rsh64x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -13596,7 +18470,7 @@ func rewriteValuegeneric_OpRsh64x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh64x32  (Const64 [0]) _)
+       // match: (Rsh64x32 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -13618,7 +18492,7 @@ func rewriteValuegeneric_OpRsh64x64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh64x64  (Const64 [c]) (Const64 [d]))
+       // match: (Rsh64x64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c >> uint64(d)])
        for {
@@ -13636,7 +18510,7 @@ func rewriteValuegeneric_OpRsh64x64(v *Value) bool {
                v.AuxInt = c >> uint64(d)
                return true
        }
-       // match: (Rsh64x64  x (Const64 [0]))
+       // match: (Rsh64x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -13653,7 +18527,7 @@ func rewriteValuegeneric_OpRsh64x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh64x64  (Const64 [0]) _)
+       // match: (Rsh64x64 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -13790,7 +18664,7 @@ func rewriteValuegeneric_OpRsh64x64(v *Value) bool {
 func rewriteValuegeneric_OpRsh64x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh64x8   <t> x (Const8  [c]))
+       // match: (Rsh64x8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh64x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -13808,7 +18682,7 @@ func rewriteValuegeneric_OpRsh64x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh64x8  (Const64 [0]) _)
+       // match: (Rsh64x8 (Const64 [0]) _)
        // cond:
        // result: (Const64 [0])
        for {
@@ -13846,7 +18720,7 @@ func rewriteValuegeneric_OpRsh8Ux16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux16  (Const8 [0]) _)
+       // match: (Rsh8Ux16 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -13884,7 +18758,7 @@ func rewriteValuegeneric_OpRsh8Ux32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux32  (Const8 [0]) _)
+       // match: (Rsh8Ux32 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -13906,7 +18780,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
        _ = b
        types := &b.Func.Config.Types
        _ = types
-       // match: (Rsh8Ux64  (Const8  [c]) (Const64 [d]))
+       // match: (Rsh8Ux64 (Const8 [c]) (Const64 [d]))
        // cond:
        // result: (Const8  [int64(int8(uint8(c) >> uint64(d)))])
        for {
@@ -13924,7 +18798,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
                v.AuxInt = int64(int8(uint8(c) >> uint64(d)))
                return true
        }
-       // match: (Rsh8Ux64  x (Const64 [0]))
+       // match: (Rsh8Ux64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -13941,7 +18815,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh8Ux64  (Const8 [0]) _)
+       // match: (Rsh8Ux64 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -13956,7 +18830,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Rsh8Ux64  _ (Const64 [c]))
+       // match: (Rsh8Ux64 _ (Const64 [c]))
        // cond: uint64(c) >= 8
        // result: (Const8  [0])
        for {
@@ -13972,7 +18846,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Rsh8Ux64  <t> (Rsh8Ux64  x (Const64 [c])) (Const64 [d]))
+       // match: (Rsh8Ux64 <t> (Rsh8Ux64 x (Const64 [c])) (Const64 [d]))
        // cond: !uaddOvf(c,d)
        // result: (Rsh8Ux64  x (Const64 <t> [c+d]))
        for {
@@ -14045,7 +18919,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value) bool {
 func rewriteValuegeneric_OpRsh8Ux8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8Ux8  <t> x (Const8  [c]))
+       // match: (Rsh8Ux8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh8Ux64 x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -14063,7 +18937,7 @@ func rewriteValuegeneric_OpRsh8Ux8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux8  (Const8 [0]) _)
+       // match: (Rsh8Ux8 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -14083,7 +18957,7 @@ func rewriteValuegeneric_OpRsh8Ux8(v *Value) bool {
 func rewriteValuegeneric_OpRsh8x16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x16  <t> x (Const16 [c]))
+       // match: (Rsh8x16 <t> x (Const16 [c]))
        // cond:
        // result: (Rsh8x64  x (Const64 <t> [int64(uint16(c))]))
        for {
@@ -14101,7 +18975,7 @@ func rewriteValuegeneric_OpRsh8x16(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x16   (Const8 [0]) _)
+       // match: (Rsh8x16 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -14121,7 +18995,7 @@ func rewriteValuegeneric_OpRsh8x16(v *Value) bool {
 func rewriteValuegeneric_OpRsh8x32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x32  <t> x (Const32 [c]))
+       // match: (Rsh8x32 <t> x (Const32 [c]))
        // cond:
        // result: (Rsh8x64  x (Const64 <t> [int64(uint32(c))]))
        for {
@@ -14139,7 +19013,7 @@ func rewriteValuegeneric_OpRsh8x32(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x32   (Const8 [0]) _)
+       // match: (Rsh8x32 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -14159,7 +19033,7 @@ func rewriteValuegeneric_OpRsh8x32(v *Value) bool {
 func rewriteValuegeneric_OpRsh8x64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x64   (Const8  [c]) (Const64 [d]))
+       // match: (Rsh8x64 (Const8 [c]) (Const64 [d]))
        // cond:
        // result: (Const8  [int64(int8(c) >> uint64(d))])
        for {
@@ -14177,7 +19051,7 @@ func rewriteValuegeneric_OpRsh8x64(v *Value) bool {
                v.AuxInt = int64(int8(c) >> uint64(d))
                return true
        }
-       // match: (Rsh8x64   x (Const64 [0]))
+       // match: (Rsh8x64 x (Const64 [0]))
        // cond:
        // result: x
        for {
@@ -14194,7 +19068,7 @@ func rewriteValuegeneric_OpRsh8x64(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Rsh8x64   (Const8 [0]) _)
+       // match: (Rsh8x64 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -14209,7 +19083,7 @@ func rewriteValuegeneric_OpRsh8x64(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Rsh8x64  <t> (Rsh8x64  x (Const64 [c])) (Const64 [d]))
+       // match: (Rsh8x64 <t> (Rsh8x64 x (Const64 [c])) (Const64 [d]))
        // cond: !uaddOvf(c,d)
        // result: (Rsh8x64  x (Const64 <t> [c+d]))
        for {
@@ -14244,7 +19118,7 @@ func rewriteValuegeneric_OpRsh8x64(v *Value) bool {
 func rewriteValuegeneric_OpRsh8x8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Rsh8x8   <t> x (Const8  [c]))
+       // match: (Rsh8x8 <t> x (Const8 [c]))
        // cond:
        // result: (Rsh8x64  x (Const64 <t> [int64(uint8(c))]))
        for {
@@ -14262,7 +19136,7 @@ func rewriteValuegeneric_OpRsh8x8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x8   (Const8 [0]) _)
+       // match: (Rsh8x8 (Const8 [0]) _)
        // cond:
        // result: (Const8  [0])
        for {
@@ -14403,7 +19277,7 @@ func rewriteValuegeneric_OpSignExt32to64(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpSignExt8to16(v *Value) bool {
-       // match: (SignExt8to16  (Const8  [c]))
+       // match: (SignExt8to16 (Const8 [c]))
        // cond:
        // result: (Const16 [int64(  int8(c))])
        for {
@@ -14416,7 +19290,7 @@ func rewriteValuegeneric_OpSignExt8to16(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (SignExt8to16  (Trunc16to8  x:(Rsh16x64 _ (Const64 [s]))))
+       // match: (SignExt8to16 (Trunc16to8 x:(Rsh16x64 _ (Const64 [s]))))
        // cond: s >= 8
        // result: x
        for {
@@ -14444,7 +19318,7 @@ func rewriteValuegeneric_OpSignExt8to16(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpSignExt8to32(v *Value) bool {
-       // match: (SignExt8to32  (Const8  [c]))
+       // match: (SignExt8to32 (Const8 [c]))
        // cond:
        // result: (Const32 [int64(  int8(c))])
        for {
@@ -14457,7 +19331,7 @@ func rewriteValuegeneric_OpSignExt8to32(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (SignExt8to32  (Trunc32to8  x:(Rsh32x64 _ (Const64 [s]))))
+       // match: (SignExt8to32 (Trunc32to8 x:(Rsh32x64 _ (Const64 [s]))))
        // cond: s >= 24
        // result: x
        for {
@@ -14485,7 +19359,7 @@ func rewriteValuegeneric_OpSignExt8to32(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpSignExt8to64(v *Value) bool {
-       // match: (SignExt8to64  (Const8  [c]))
+       // match: (SignExt8to64 (Const8 [c]))
        // cond:
        // result: (Const64 [int64(  int8(c))])
        for {
@@ -14498,7 +19372,7 @@ func rewriteValuegeneric_OpSignExt8to64(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (SignExt8to64  (Trunc64to8  x:(Rsh64x64 _ (Const64 [s]))))
+       // match: (SignExt8to64 (Trunc64to8 x:(Rsh64x64 _ (Const64 [s]))))
        // cond: s >= 56
        // result: x
        for {
@@ -15343,7 +20217,7 @@ func rewriteValuegeneric_OpStructSelect(v *Value) bool {
 func rewriteValuegeneric_OpSub16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Sub16  (Const16 [c]) (Const16 [d]))
+       // match: (Sub16 (Const16 [c]) (Const16 [d]))
        // cond:
        // result: (Const16 [int64(int16(c-d))])
        for {
@@ -15412,6 +20286,24 @@ func rewriteValuegeneric_OpSub16(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (Sub16 (Add16 y x) x)
+       // cond:
+       // result: y
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
        // match: (Sub16 (Add16 x y) y)
        // cond:
        // result: x
@@ -15430,6 +20322,24 @@ func rewriteValuegeneric_OpSub16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Sub16 (Add16 y x) y)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd16 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if y != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Sub16 x (Sub16 i:(Const16 <t>) z))
        // cond: (z.Op != OpConst16 && x.Op != OpConst16)
        // result: (Sub16 (Add16 <t> x z) i)
@@ -15547,7 +20457,7 @@ func rewriteValuegeneric_OpSub16(v *Value) bool {
 func rewriteValuegeneric_OpSub32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Sub32  (Const32 [c]) (Const32 [d]))
+       // match: (Sub32 (Const32 [c]) (Const32 [d]))
        // cond:
        // result: (Const32 [int64(int32(c-d))])
        for {
@@ -15616,6 +20526,24 @@ func rewriteValuegeneric_OpSub32(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (Sub32 (Add32 y x) x)
+       // cond:
+       // result: y
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
        // match: (Sub32 (Add32 x y) y)
        // cond:
        // result: x
@@ -15634,6 +20562,24 @@ func rewriteValuegeneric_OpSub32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Sub32 (Add32 y x) y)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd32 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if y != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Sub32 x (Sub32 i:(Const32 <t>) z))
        // cond: (z.Op != OpConst32 && x.Op != OpConst32)
        // result: (Sub32 (Add32 <t> x z) i)
@@ -15789,7 +20735,7 @@ func rewriteValuegeneric_OpSub32F(v *Value) bool {
 func rewriteValuegeneric_OpSub64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Sub64  (Const64 [c]) (Const64 [d]))
+       // match: (Sub64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c-d])
        for {
@@ -15858,6 +20804,24 @@ func rewriteValuegeneric_OpSub64(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (Sub64 (Add64 y x) x)
+       // cond:
+       // result: y
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
        // match: (Sub64 (Add64 x y) y)
        // cond:
        // result: x
@@ -15876,6 +20840,24 @@ func rewriteValuegeneric_OpSub64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Sub64 (Add64 y x) y)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd64 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if y != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Sub64 x (Sub64 i:(Const64 <t>) z))
        // cond: (z.Op != OpConst64 && x.Op != OpConst64)
        // result: (Sub64 (Add64 <t> x z) i)
@@ -16031,7 +21013,7 @@ func rewriteValuegeneric_OpSub64F(v *Value) bool {
 func rewriteValuegeneric_OpSub8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Sub8   (Const8 [c]) (Const8 [d]))
+       // match: (Sub8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8 [int64(int8(c-d))])
        for {
@@ -16049,7 +21031,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AuxInt = int64(int8(c - d))
                return true
        }
-       // match: (Sub8  x (Const8  <t> [c]))
+       // match: (Sub8 x (Const8 <t> [c]))
        // cond: x.Op != OpConst8
        // result: (Add8  (Const8  <t> [int64(int8(-c))]) x)
        for {
@@ -16070,7 +21052,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Sub8  x x)
+       // match: (Sub8 x x)
        // cond:
        // result: (Const8  [0])
        for {
@@ -16082,7 +21064,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Sub8  (Add8  x y) x)
+       // match: (Sub8 (Add8 x y) x)
        // cond:
        // result: y
        for {
@@ -16100,7 +21082,25 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Sub8  (Add8  x y) y)
+       // match: (Sub8 (Add8 y x) x)
+       // cond:
+       // result: y
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (Sub8 (Add8 x y) y)
        // cond:
        // result: x
        for {
@@ -16118,7 +21118,25 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Sub8  x (Sub8  i:(Const8  <t>) z))
+       // match: (Sub8 (Add8 y x) y)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAdd8 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if y != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Sub8 x (Sub8 i:(Const8 <t>) z))
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Sub8  (Add8  <t> x z) i)
        for {
@@ -16144,7 +21162,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(i)
                return true
        }
-       // match: (Sub8  x (Sub8  z i:(Const8  <t>)))
+       // match: (Sub8 x (Sub8 z i:(Const8 <t>)))
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Add8  i (Sub8  <t> x z))
        for {
@@ -16170,7 +21188,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Sub8  (Const8  <t> [c]) (Sub8  x (Const8  <t> [d])))
+       // match: (Sub8 (Const8 <t> [c]) (Sub8 x (Const8 <t> [d])))
        // cond:
        // result: (Sub8  (Const8  <t> [int64(int8(c+d))]) x)
        for {
@@ -16200,7 +21218,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Sub8  (Const8  <t> [c]) (Sub8  (Const8  <t> [d]) x))
+       // match: (Sub8 (Const8 <t> [c]) (Sub8 (Const8 <t> [d]) x))
        // cond:
        // result: (Add8  (Const8  <t> [int64(int8(c-d))]) x)
        for {
@@ -16233,7 +21251,7 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpTrunc16to8(v *Value) bool {
-       // match: (Trunc16to8  (Const16 [c]))
+       // match: (Trunc16to8 (Const16 [c]))
        // cond:
        // result: (Const8   [int64(int8(c))])
        for {
@@ -16246,7 +21264,7 @@ func rewriteValuegeneric_OpTrunc16to8(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (Trunc16to8  (ZeroExt8to16  x))
+       // match: (Trunc16to8 (ZeroExt8to16 x))
        // cond:
        // result: x
        for {
@@ -16260,7 +21278,7 @@ func rewriteValuegeneric_OpTrunc16to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc16to8  (SignExt8to16  x))
+       // match: (Trunc16to8 (SignExt8to16 x))
        // cond:
        // result: x
        for {
@@ -16274,7 +21292,7 @@ func rewriteValuegeneric_OpTrunc16to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc16to8  (And16 (Const16 [y]) x))
+       // match: (Trunc16to8 (And16 (Const16 [y]) x))
        // cond: y&0xFF == 0xFF
        // result: (Trunc16to8 x)
        for {
@@ -16295,6 +21313,27 @@ func rewriteValuegeneric_OpTrunc16to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Trunc16to8 (And16 x (Const16 [y])))
+       // cond: y&0xFF == 0xFF
+       // result: (Trunc16to8 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd16 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
+                       break
+               }
+               y := v_0_1.AuxInt
+               if !(y&0xFF == 0xFF) {
+                       break
+               }
+               v.reset(OpTrunc16to8)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
@@ -16311,7 +21350,7 @@ func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
                v.AuxInt = int64(int16(c))
                return true
        }
-       // match: (Trunc32to16 (ZeroExt8to32  x))
+       // match: (Trunc32to16 (ZeroExt8to32 x))
        // cond:
        // result: (ZeroExt8to16  x)
        for {
@@ -16338,7 +21377,7 @@ func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc32to16 (SignExt8to32  x))
+       // match: (Trunc32to16 (SignExt8to32 x))
        // cond:
        // result: (SignExt8to16  x)
        for {
@@ -16365,7 +21404,28 @@ func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc32to16 (And32 (Const32 [y]) x))
+       // match: (Trunc32to16 (And32 (Const32 [y]) x))
+       // cond: y&0xFFFF == 0xFFFF
+       // result: (Trunc32to16 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd32 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
+                       break
+               }
+               y := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(y&0xFFFF == 0xFFFF) {
+                       break
+               }
+               v.reset(OpTrunc32to16)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Trunc32to16 (And32 x (Const32 [y])))
        // cond: y&0xFFFF == 0xFFFF
        // result: (Trunc32to16 x)
        for {
@@ -16373,12 +21433,12 @@ func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
                if v_0.Op != OpAnd32 {
                        break
                }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpConst32 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               y := v_0_0.AuxInt
-               x := v_0.Args[1]
+               y := v_0_1.AuxInt
                if !(y&0xFFFF == 0xFFFF) {
                        break
                }
@@ -16389,7 +21449,7 @@ func rewriteValuegeneric_OpTrunc32to16(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpTrunc32to8(v *Value) bool {
-       // match: (Trunc32to8  (Const32 [c]))
+       // match: (Trunc32to8 (Const32 [c]))
        // cond:
        // result: (Const8   [int64(int8(c))])
        for {
@@ -16402,7 +21462,7 @@ func rewriteValuegeneric_OpTrunc32to8(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (Trunc32to8  (ZeroExt8to32  x))
+       // match: (Trunc32to8 (ZeroExt8to32 x))
        // cond:
        // result: x
        for {
@@ -16416,7 +21476,7 @@ func rewriteValuegeneric_OpTrunc32to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc32to8  (SignExt8to32  x))
+       // match: (Trunc32to8 (SignExt8to32 x))
        // cond:
        // result: x
        for {
@@ -16430,7 +21490,7 @@ func rewriteValuegeneric_OpTrunc32to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc32to8  (And32 (Const32 [y]) x))
+       // match: (Trunc32to8 (And32 (Const32 [y]) x))
        // cond: y&0xFF == 0xFF
        // result: (Trunc32to8 x)
        for {
@@ -16451,6 +21511,27 @@ func rewriteValuegeneric_OpTrunc32to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Trunc32to8 (And32 x (Const32 [y])))
+       // cond: y&0xFF == 0xFF
+       // result: (Trunc32to8 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd32 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
+                       break
+               }
+               y := v_0_1.AuxInt
+               if !(y&0xFF == 0xFF) {
+                       break
+               }
+               v.reset(OpTrunc32to8)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpTrunc64to16(v *Value) bool {
@@ -16467,7 +21548,7 @@ func rewriteValuegeneric_OpTrunc64to16(v *Value) bool {
                v.AuxInt = int64(int16(c))
                return true
        }
-       // match: (Trunc64to16 (ZeroExt8to64  x))
+       // match: (Trunc64to16 (ZeroExt8to64 x))
        // cond:
        // result: (ZeroExt8to16  x)
        for {
@@ -16494,7 +21575,7 @@ func rewriteValuegeneric_OpTrunc64to16(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc64to16 (SignExt8to64  x))
+       // match: (Trunc64to16 (SignExt8to64 x))
        // cond:
        // result: (SignExt8to16  x)
        for {
@@ -16542,6 +21623,27 @@ func rewriteValuegeneric_OpTrunc64to16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Trunc64to16 (And64 x (Const64 [y])))
+       // cond: y&0xFFFF == 0xFFFF
+       // result: (Trunc64to16 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               y := v_0_1.AuxInt
+               if !(y&0xFFFF == 0xFFFF) {
+                       break
+               }
+               v.reset(OpTrunc64to16)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpTrunc64to32(v *Value) bool {
@@ -16558,7 +21660,7 @@ func rewriteValuegeneric_OpTrunc64to32(v *Value) bool {
                v.AuxInt = int64(int32(c))
                return true
        }
-       // match: (Trunc64to32 (ZeroExt8to64  x))
+       // match: (Trunc64to32 (ZeroExt8to64 x))
        // cond:
        // result: (ZeroExt8to32  x)
        for {
@@ -16598,7 +21700,7 @@ func rewriteValuegeneric_OpTrunc64to32(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc64to32 (SignExt8to64  x))
+       // match: (Trunc64to32 (SignExt8to64 x))
        // cond:
        // result: (SignExt8to32  x)
        for {
@@ -16659,10 +21761,31 @@ func rewriteValuegeneric_OpTrunc64to32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Trunc64to32 (And64 x (Const64 [y])))
+       // cond: y&0xFFFFFFFF == 0xFFFFFFFF
+       // result: (Trunc64to32 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               y := v_0_1.AuxInt
+               if !(y&0xFFFFFFFF == 0xFFFFFFFF) {
+                       break
+               }
+               v.reset(OpTrunc64to32)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpTrunc64to8(v *Value) bool {
-       // match: (Trunc64to8  (Const64 [c]))
+       // match: (Trunc64to8 (Const64 [c]))
        // cond:
        // result: (Const8   [int64(int8(c))])
        for {
@@ -16675,7 +21798,7 @@ func rewriteValuegeneric_OpTrunc64to8(v *Value) bool {
                v.AuxInt = int64(int8(c))
                return true
        }
-       // match: (Trunc64to8  (ZeroExt8to64  x))
+       // match: (Trunc64to8 (ZeroExt8to64 x))
        // cond:
        // result: x
        for {
@@ -16689,7 +21812,7 @@ func rewriteValuegeneric_OpTrunc64to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc64to8  (SignExt8to64  x))
+       // match: (Trunc64to8 (SignExt8to64 x))
        // cond:
        // result: x
        for {
@@ -16703,7 +21826,7 @@ func rewriteValuegeneric_OpTrunc64to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Trunc64to8  (And64 (Const64 [y]) x))
+       // match: (Trunc64to8 (And64 (Const64 [y]) x))
        // cond: y&0xFF == 0xFF
        // result: (Trunc64to8 x)
        for {
@@ -16724,12 +21847,33 @@ func rewriteValuegeneric_OpTrunc64to8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Trunc64to8 (And64 x (Const64 [y])))
+       // cond: y&0xFF == 0xFF
+       // result: (Trunc64to8 x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAnd64 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
+                       break
+               }
+               y := v_0_1.AuxInt
+               if !(y&0xFF == 0xFF) {
+                       break
+               }
+               v.reset(OpTrunc64to8)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpXor16(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Xor16  (Const16 [c])  (Const16 [d]))
+       // match: (Xor16 (Const16 [c]) (Const16 [d]))
        // cond:
        // result: (Const16 [int64(int16(c^d))])
        for {
@@ -16747,25 +21891,22 @@ func rewriteValuegeneric_OpXor16(v *Value) bool {
                v.AuxInt = int64(int16(c ^ d))
                return true
        }
-       // match: (Xor16 x (Const16 <t> [c]))
-       // cond: x.Op != OpConst16
-       // result: (Xor16 (Const16 <t> [c]) x)
+       // match: (Xor16 (Const16 [d]) (Const16 [c]))
+       // cond:
+       // result: (Const16 [int64(int16(c^d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst16) {
-                       break
-               }
-               v.reset(OpXor16)
-               v0 := b.NewValue0(v.Pos, OpConst16, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c ^ d))
                return true
        }
        // match: (Xor16 x x)
@@ -16797,6 +21938,23 @@ func rewriteValuegeneric_OpXor16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Xor16 x (Const16 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Xor16 x (Xor16 x y))
        // cond:
        // result: y
@@ -16851,90 +22009,241 @@ func rewriteValuegeneric_OpXor16(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor16 (Xor16 x y) y)
+       // match: (Xor16 (Xor16 y x) x)
        // cond:
-       // result: x
+       // result: y
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor16 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (Xor16 (Xor16 i:(Const16 <t>) z) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Xor16 i (Xor16 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor16 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpXor16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor16 (Xor16 z i:(Const16 <t>)) x)
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Xor16 i (Xor16 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor16 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpXor16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor16 x (Xor16 i:(Const16 <t>) z))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Xor16 i (Xor16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor16 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpXor16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor16 x (Xor16 z i:(Const16 <t>)))
+       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
+       // result: (Xor16 i (Xor16 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor16 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst16 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+                       break
+               }
+               v.reset(OpXor16)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor16, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor16 (Const16 <t> [c]) (Xor16 (Const16 <t> [d]) x))
+       // cond:
+       // result: (Xor16 (Const16 <t> [int64(int16(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst16 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpXor16)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c ^ d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Xor16 x l:(Xor16 _ _))
-       // cond: (x.Op != OpXor16 && x.Op != OpConst16)
-       // result: (Xor16 l x)
+       // match: (Xor16 (Const16 <t> [c]) (Xor16 x (Const16 <t> [d])))
+       // cond:
+       // result: (Xor16 (Const16 <t> [int64(int16(c^d))]) x)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpXor16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor16 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst16 {
                        break
                }
-               if !(x.Op != OpXor16 && x.Op != OpConst16) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpXor16)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c ^ d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Xor16 (Xor16 i:(Const16 <t>) z) x)
-       // cond: (z.Op != OpConst16 && x.Op != OpConst16)
-       // result: (Xor16 i (Xor16 <t> z x))
+       // match: (Xor16 (Xor16 (Const16 <t> [d]) x) (Const16 <t> [c]))
+       // cond:
+       // result: (Xor16 (Const16 <t> [int64(int16(c^d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor16 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst16 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst16 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst16 && x.Op != OpConst16) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
                        break
                }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpXor16)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpXor16, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = int64(int16(c ^ d))
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Xor16 (Const16 <t> [c]) (Xor16 (Const16 <t> [d]) x))
+       // match: (Xor16 (Xor16 x (Const16 <t> [d])) (Const16 <t> [c]))
        // cond:
        // result: (Xor16 (Const16 <t> [int64(int16(c^d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
+               if v_0.Op != OpXor16 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpXor16 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst16 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst16 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpXor16)
                v0 := b.NewValue0(v.Pos, OpConst16, t)
                v0.AuxInt = int64(int16(c ^ d))
@@ -16947,7 +22256,7 @@ func rewriteValuegeneric_OpXor16(v *Value) bool {
 func rewriteValuegeneric_OpXor32(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Xor32  (Const32 [c])  (Const32 [d]))
+       // match: (Xor32 (Const32 [c]) (Const32 [d]))
        // cond:
        // result: (Const32 [int64(int32(c^d))])
        for {
@@ -16965,25 +22274,22 @@ func rewriteValuegeneric_OpXor32(v *Value) bool {
                v.AuxInt = int64(int32(c ^ d))
                return true
        }
-       // match: (Xor32 x (Const32 <t> [c]))
-       // cond: x.Op != OpConst32
-       // result: (Xor32 (Const32 <t> [c]) x)
+       // match: (Xor32 (Const32 [d]) (Const32 [c]))
+       // cond:
+       // result: (Const32 [int64(int32(c^d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst32 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst32) {
-                       break
-               }
-               v.reset(OpXor32)
-               v0 := b.NewValue0(v.Pos, OpConst32, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c ^ d))
                return true
        }
        // match: (Xor32 x x)
@@ -17015,6 +22321,23 @@ func rewriteValuegeneric_OpXor32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Xor32 x (Const32 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Xor32 x (Xor32 x y))
        // cond:
        // result: y
@@ -17069,90 +22392,241 @@ func rewriteValuegeneric_OpXor32(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor32 (Xor32 x y) y)
+       // match: (Xor32 (Xor32 y x) x)
        // cond:
-       // result: x
+       // result: y
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor32 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (Xor32 (Xor32 i:(Const32 <t>) z) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Xor32 i (Xor32 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor32 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpXor32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor32 (Xor32 z i:(Const32 <t>)) x)
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Xor32 i (Xor32 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor32 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpXor32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor32 x (Xor32 i:(Const32 <t>) z))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Xor32 i (Xor32 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor32 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpXor32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor32 x (Xor32 z i:(Const32 <t>)))
+       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
+       // result: (Xor32 i (Xor32 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor32 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst32 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+                       break
+               }
+               v.reset(OpXor32)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor32, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor32 (Const32 <t> [c]) (Xor32 (Const32 <t> [d]) x))
+       // cond:
+       // result: (Xor32 (Const32 <t> [int64(int32(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor32 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst32 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpXor32)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c ^ d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Xor32 (Const32 <t> [c]) (Xor32 x (Const32 <t> [d])))
+       // cond:
+       // result: (Xor32 (Const32 <t> [int64(int32(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor32 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (Xor32 x l:(Xor32 _ _))
-       // cond: (x.Op != OpXor32 && x.Op != OpConst32)
-       // result: (Xor32 l x)
-       for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpXor32 {
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst32 {
                        break
                }
-               if !(x.Op != OpXor32 && x.Op != OpConst32) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpXor32)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c ^ d))
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Xor32 (Xor32 i:(Const32 <t>) z) x)
-       // cond: (z.Op != OpConst32 && x.Op != OpConst32)
-       // result: (Xor32 i (Xor32 <t> z x))
+       // match: (Xor32 (Xor32 (Const32 <t> [d]) x) (Const32 <t> [c]))
+       // cond:
+       // result: (Xor32 (Const32 <t> [int64(int32(c^d))]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor32 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst32 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst32 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst32 && x.Op != OpConst32) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
                v.reset(OpXor32)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpXor32, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = int64(int32(c ^ d))
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Xor32 (Const32 <t> [c]) (Xor32 (Const32 <t> [d]) x))
+       // match: (Xor32 (Xor32 x (Const32 <t> [d])) (Const32 <t> [c]))
        // cond:
        // result: (Xor32 (Const32 <t> [int64(int32(c^d))]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst32 {
+               if v_0.Op != OpXor32 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpXor32 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst32 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst32 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpXor32)
                v0 := b.NewValue0(v.Pos, OpConst32, t)
                v0.AuxInt = int64(int32(c ^ d))
@@ -17165,7 +22639,7 @@ func rewriteValuegeneric_OpXor32(v *Value) bool {
 func rewriteValuegeneric_OpXor64(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Xor64  (Const64 [c])  (Const64 [d]))
+       // match: (Xor64 (Const64 [c]) (Const64 [d]))
        // cond:
        // result: (Const64 [c^d])
        for {
@@ -17183,25 +22657,22 @@ func rewriteValuegeneric_OpXor64(v *Value) bool {
                v.AuxInt = c ^ d
                return true
        }
-       // match: (Xor64 x (Const64 <t> [c]))
-       // cond: x.Op != OpConst64
-       // result: (Xor64 (Const64 <t> [c]) x)
+       // match: (Xor64 (Const64 [d]) (Const64 [c]))
+       // cond:
+       // result: (Const64 [c^d])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst64 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst64) {
-                       break
-               }
-               v.reset(OpXor64)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst64)
+               v.AuxInt = c ^ d
                return true
        }
        // match: (Xor64 x x)
@@ -17233,6 +22704,23 @@ func rewriteValuegeneric_OpXor64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Xor64 x (Const64 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Xor64 x (Xor64 x y))
        // cond:
        // result: y
@@ -17287,90 +22775,241 @@ func rewriteValuegeneric_OpXor64(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor64 (Xor64 x y) y)
+       // match: (Xor64 (Xor64 y x) x)
        // cond:
-       // result: x
+       // result: y
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor64 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (Xor64 (Xor64 i:(Const64 <t>) z) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Xor64 i (Xor64 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor64 {
+                       break
+               }
+               i := v_0.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpXor64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor64 (Xor64 z i:(Const64 <t>)) x)
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Xor64 i (Xor64 <t> z x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor64 {
+                       break
+               }
+               z := v_0.Args[0]
+               i := v_0.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               x := v.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpXor64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor64 x (Xor64 i:(Const64 <t>) z))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Xor64 i (Xor64 <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor64 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpXor64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (Xor64 x l:(Xor64 _ _))
-       // cond: (x.Op != OpXor64 && x.Op != OpConst64)
-       // result: (Xor64 l x)
+       // match: (Xor64 x (Xor64 z i:(Const64 <t>)))
+       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
+       // result: (Xor64 i (Xor64 <t> z x))
        for {
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpXor64 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor64 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst64 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+                       break
+               }
+               v.reset(OpXor64)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor64, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor64 (Const64 <t> [c]) (Xor64 (Const64 <t> [d]) x))
+       // cond:
+       // result: (Xor64 (Const64 <t> [c^d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor64 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpConst64 {
+                       break
+               }
+               if v_1_0.Type != t {
+                       break
+               }
+               d := v_1_0.AuxInt
+               x := v_1.Args[1]
+               v.reset(OpXor64)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c ^ d
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Xor64 (Const64 <t> [c]) (Xor64 x (Const64 <t> [d])))
+       // cond:
+       // result: (Xor64 (Const64 <t> [c^d]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor64 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst64 {
                        break
                }
-               if !(x.Op != OpXor64 && x.Op != OpConst64) {
+               if v_1_1.Type != t {
                        break
                }
+               d := v_1_1.AuxInt
                v.reset(OpXor64)
-               v.AddArg(l)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c ^ d
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (Xor64 (Xor64 i:(Const64 <t>) z) x)
-       // cond: (z.Op != OpConst64 && x.Op != OpConst64)
-       // result: (Xor64 i (Xor64 <t> z x))
+       // match: (Xor64 (Xor64 (Const64 <t> [d]) x) (Const64 <t> [c]))
+       // cond:
+       // result: (Xor64 (Const64 <t> [c^d]) x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor64 {
                        break
                }
-               i := v_0.Args[0]
-               if i.Op != OpConst64 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst64 {
                        break
                }
-               t := i.Type
-               z := v_0.Args[1]
-               x := v.Args[1]
-               if !(z.Op != OpConst64 && x.Op != OpConst64) {
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.Type != t {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpXor64)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpXor64, t)
-               v0.AddArg(z)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = c ^ d
                v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (Xor64 (Const64 <t> [c]) (Xor64 (Const64 <t> [d]) x))
+       // match: (Xor64 (Xor64 x (Const64 <t> [d])) (Const64 <t> [c]))
        // cond:
        // result: (Xor64 (Const64 <t> [c^d]) x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpConst64 {
+               if v_0.Op != OpXor64 {
                        break
                }
-               t := v_0.Type
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpXor64 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst64 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpConst64 {
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
                        break
                }
-               if v_1_0.Type != t {
+               if v_1.Type != t {
                        break
                }
-               d := v_1_0.AuxInt
-               x := v_1.Args[1]
+               c := v_1.AuxInt
                v.reset(OpXor64)
                v0 := b.NewValue0(v.Pos, OpConst64, t)
                v0.AuxInt = c ^ d
@@ -17383,7 +23022,7 @@ func rewriteValuegeneric_OpXor64(v *Value) bool {
 func rewriteValuegeneric_OpXor8(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (Xor8   (Const8 [c])   (Const8 [d]))
+       // match: (Xor8 (Const8 [c]) (Const8 [d]))
        // cond:
        // result: (Const8  [int64(int8(c^d))])
        for {
@@ -17401,28 +23040,25 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AuxInt = int64(int8(c ^ d))
                return true
        }
-       // match: (Xor8  x (Const8  <t> [c]))
-       // cond: x.Op != OpConst8
-       // result: (Xor8  (Const8  <t> [c]) x)
+       // match: (Xor8 (Const8 [d]) (Const8 [c]))
+       // cond:
+       // result: (Const8  [int64(int8(c^d))])
        for {
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               d := v_0.AuxInt
                v_1 := v.Args[1]
                if v_1.Op != OpConst8 {
                        break
                }
-               t := v_1.Type
                c := v_1.AuxInt
-               if !(x.Op != OpConst8) {
-                       break
-               }
-               v.reset(OpXor8)
-               v0 := b.NewValue0(v.Pos, OpConst8, t)
-               v0.AuxInt = c
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(c ^ d))
                return true
        }
-       // match: (Xor8  x x)
+       // match: (Xor8 x x)
        // cond:
        // result: (Const8  [0])
        for {
@@ -17434,7 +23070,7 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (Xor8  (Const8  [0]) x)
+       // match: (Xor8 (Const8 [0]) x)
        // cond:
        // result: x
        for {
@@ -17451,7 +23087,24 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (Xor8  x (Xor8  x y))
+       // match: (Xor8 x (Const8 [0]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Xor8 x (Xor8 x y))
        // cond:
        // result: y
        for {
@@ -17469,7 +23122,7 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor8  x (Xor8  y x))
+       // match: (Xor8 x (Xor8 y x))
        // cond:
        // result: y
        for {
@@ -17487,7 +23140,7 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor8  (Xor8  x y) x)
+       // match: (Xor8 (Xor8 x y) x)
        // cond:
        // result: y
        for {
@@ -17505,42 +23158,51 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (Xor8  (Xor8  x y) y)
+       // match: (Xor8 (Xor8 y x) x)
        // cond:
-       // result: x
+       // result: y
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpXor8 {
                        break
                }
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if y != v.Args[1] {
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (Xor8  x l:(Xor8  _ _))
-       // cond: (x.Op != OpXor8  && x.Op != OpConst8)
-       // result: (Xor8  l x)
+       // match: (Xor8 (Xor8 i:(Const8 <t>) z) x)
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Xor8  i (Xor8  <t> z x))
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpXor8 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor8 {
                        break
                }
-               if !(x.Op != OpXor8 && x.Op != OpConst8) {
+               i := v_0.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_0.Args[1]
+               x := v.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
                        break
                }
                v.reset(OpXor8)
-               v.AddArg(l)
-               v.AddArg(x)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (Xor8  (Xor8  i:(Const8  <t>) z) x)
+       // match: (Xor8 (Xor8 z i:(Const8 <t>)) x)
        // cond: (z.Op != OpConst8  && x.Op != OpConst8)
        // result: (Xor8  i (Xor8  <t> z x))
        for {
@@ -17548,12 +23210,12 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                if v_0.Op != OpXor8 {
                        break
                }
-               i := v_0.Args[0]
+               z := v_0.Args[0]
+               i := v_0.Args[1]
                if i.Op != OpConst8 {
                        break
                }
                t := i.Type
-               z := v_0.Args[1]
                x := v.Args[1]
                if !(z.Op != OpConst8 && x.Op != OpConst8) {
                        break
@@ -17566,7 +23228,59 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (Xor8  (Const8  <t> [c]) (Xor8  (Const8  <t> [d]) x))
+       // match: (Xor8 x (Xor8 i:(Const8 <t>) z))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Xor8  i (Xor8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor8 {
+                       break
+               }
+               i := v_1.Args[0]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               z := v_1.Args[1]
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpXor8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor8 x (Xor8 z i:(Const8 <t>)))
+       // cond: (z.Op != OpConst8  && x.Op != OpConst8)
+       // result: (Xor8  i (Xor8  <t> z x))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor8 {
+                       break
+               }
+               z := v_1.Args[0]
+               i := v_1.Args[1]
+               if i.Op != OpConst8 {
+                       break
+               }
+               t := i.Type
+               if !(z.Op != OpConst8 && x.Op != OpConst8) {
+                       break
+               }
+               v.reset(OpXor8)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpXor8, t)
+               v0.AddArg(z)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Xor8 (Const8 <t> [c]) (Xor8 (Const8 <t> [d]) x))
        // cond:
        // result: (Xor8  (Const8  <t> [int64(int8(c^d))]) x)
        for {
@@ -17596,6 +23310,96 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Xor8 (Const8 <t> [c]) (Xor8 x (Const8 <t> [d])))
+       // cond:
+       // result: (Xor8  (Const8  <t> [int64(int8(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpXor8 {
+                       break
+               }
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpConst8 {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               d := v_1_1.AuxInt
+               v.reset(OpXor8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c ^ d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Xor8 (Xor8 (Const8 <t> [d]) x) (Const8 <t> [c]))
+       // cond:
+       // result: (Xor8  (Const8  <t> [int64(int8(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpConst8 {
+                       break
+               }
+               t := v_0_0.Type
+               d := v_0_0.AuxInt
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpXor8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c ^ d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (Xor8 (Xor8 x (Const8 <t> [d])) (Const8 <t> [c]))
+       // cond:
+       // result: (Xor8  (Const8  <t> [int64(int8(c^d))]) x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpXor8 {
+                       break
+               }
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpConst8 {
+                       break
+               }
+               t := v_0_1.Type
+               d := v_0_1.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               if v_1.Type != t {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpXor8)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = int64(int8(c ^ d))
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpZero(v *Value) bool {
@@ -17758,7 +23562,7 @@ func rewriteValuegeneric_OpZeroExt32to64(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpZeroExt8to16(v *Value) bool {
-       // match: (ZeroExt8to16  (Const8  [c]))
+       // match: (ZeroExt8to16 (Const8 [c]))
        // cond:
        // result: (Const16 [int64( uint8(c))])
        for {
@@ -17771,7 +23575,7 @@ func rewriteValuegeneric_OpZeroExt8to16(v *Value) bool {
                v.AuxInt = int64(uint8(c))
                return true
        }
-       // match: (ZeroExt8to16  (Trunc16to8  x:(Rsh16Ux64 _ (Const64 [s]))))
+       // match: (ZeroExt8to16 (Trunc16to8 x:(Rsh16Ux64 _ (Const64 [s]))))
        // cond: s >= 8
        // result: x
        for {
@@ -17799,7 +23603,7 @@ func rewriteValuegeneric_OpZeroExt8to16(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpZeroExt8to32(v *Value) bool {
-       // match: (ZeroExt8to32  (Const8  [c]))
+       // match: (ZeroExt8to32 (Const8 [c]))
        // cond:
        // result: (Const32 [int64( uint8(c))])
        for {
@@ -17812,7 +23616,7 @@ func rewriteValuegeneric_OpZeroExt8to32(v *Value) bool {
                v.AuxInt = int64(uint8(c))
                return true
        }
-       // match: (ZeroExt8to32  (Trunc32to8  x:(Rsh32Ux64 _ (Const64 [s]))))
+       // match: (ZeroExt8to32 (Trunc32to8 x:(Rsh32Ux64 _ (Const64 [s]))))
        // cond: s >= 24
        // result: x
        for {
@@ -17840,7 +23644,7 @@ func rewriteValuegeneric_OpZeroExt8to32(v *Value) bool {
        return false
 }
 func rewriteValuegeneric_OpZeroExt8to64(v *Value) bool {
-       // match: (ZeroExt8to64  (Const8  [c]))
+       // match: (ZeroExt8to64 (Const8 [c]))
        // cond:
        // result: (Const64 [int64( uint8(c))])
        for {
@@ -17853,7 +23657,7 @@ func rewriteValuegeneric_OpZeroExt8to64(v *Value) bool {
                v.AuxInt = int64(uint8(c))
                return true
        }
-       // match: (ZeroExt8to64  (Trunc64to8  x:(Rsh64Ux64 _ (Const64 [s]))))
+       // match: (ZeroExt8to64 (Trunc64to8 x:(Rsh64Ux64 _ (Const64 [s]))))
        // cond: s >= 56
        // result: x
        for {