]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: merge more shifts into stores
authorJosh Bleecher Snyder <josharian@gmail.com>
Tue, 7 Jan 2020 21:41:02 +0000 (13:41 -0800)
committerJosh Bleecher Snyder <josharian@gmail.com>
Thu, 20 Feb 2020 18:37:38 +0000 (18:37 +0000)
Updates #36223

(Might fix #36223. I'm not sure whether there are more outstanding.)

This helps a bit, but not as much as I'd expected/hoped.

file                                      before  after   Δ       %
runtime.s                                 477286  477256  -30     -0.006%
bytes.s                                   31089   31085   -4      -0.013%
time.s                                    83561   83547   -14     -0.017%
strings.s                                 43284   43280   -4      -0.009%
compress/flate.s                          51374   51295   -79     -0.154%
math/big.s                                184283  184256  -27     -0.015%
crypto/elliptic.s                         51649   51577   -72     -0.139%
crypto/sha512.s                           8661    8644    -17     -0.196%
crypto/sha1.s                             6975    6959    -16     -0.229%
crypto/sha256.s                           6412    6393    -19     -0.296%
vendor/golang.org/x/text/unicode/bidi.s   27158   27146   -12     -0.044%
vendor/golang.org/x/text/unicode/norm.s   66802   66788   -14     -0.021%
net/http.s                                560936  560929  -7      -0.001%
text/template.s                           96475   96467   -8      -0.008%
go/parser.s                               80284   80280   -4      -0.005%
text/tabwriter.s                          9618    9611    -7      -0.073%
go/printer.s                              78502   78499   -3      -0.004%
go/types.s                                321815  321807  -8      -0.002%
internal/xcoff.s                          23175   23171   -4      -0.017%
image/jpeg.s                              36609   36587   -22     -0.060%
cmd/vendor/golang.org/x/arch/x86/x86asm.s 81274   81001   -273    -0.336%
cmd/internal/obj.s                        115184  115126  -58     -0.050%
cmd/internal/obj/arm64.s                  151502  151487  -15     -0.010%
cmd/internal/obj/s390x.s                  128054  128046  -8      -0.006%
cmd/internal/obj/wasm.s                   44295   44291   -4      -0.009%
cmd/compile/internal/ssa.s                4201992 4209504 +7512   +0.179%
cmd/compile/internal/gc.s                 1555029 1555011 -18     -0.001%
total                                     9792875 9799640 +6765   +0.069%

Change-Id: If4a857c0953a766578e68aa299b112a20d9b2b86
Reviewed-on: https://go-review.googlesource.com/c/go/+/213704
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go

index 18513a1c1d4fb33d2bcb0b960a8b93eeac4be6be..08aa65b0a804108bb464f9762846cbd582b6fb77 100644 (file)
@@ -597,15 +597,14 @@ func init() {
                {name: "MOVQloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVQ", scale: 1, aux: "SymOff", typ: "UInt64", symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
                {name: "MOVQloadidx8", argLength: 3, reg: gploadidx, asm: "MOVQ", scale: 8, aux: "SymOff", typ: "UInt64", symEffect: "Read"},                       // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
                // TODO: sign-extending indexed loads
-               // TODO: mark the MOVXstoreidx1 ops as commutative.  Generates too many rewrite rules at the moment.
-               {name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVB", scale: 1, aux: "SymOff", symEffect: "Write"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVW", scale: 1, aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", scale: 2, aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
-               {name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVL", scale: 1, aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", scale: 4, aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
-               {name: "MOVLstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVL", scale: 8, aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
-               {name: "MOVQstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVQ", scale: 1, aux: "SymOff", symEffect: "Write"}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
-               {name: "MOVQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVQ", scale: 8, aux: "SymOff", symEffect: "Write"}, // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
+               {name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", scale: 1, aux: "SymOff", symEffect: "Write"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", scale: 1, aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", scale: 2, aux: "SymOff", symEffect: "Write"},                    // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
+               {name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVL", scale: 1, aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", scale: 4, aux: "SymOff", symEffect: "Write"},                    // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
+               {name: "MOVLstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVL", scale: 8, aux: "SymOff", symEffect: "Write"},                    // store 4 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
+               {name: "MOVQstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVQ", scale: 1, aux: "SymOff", symEffect: "Write"}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVQ", scale: 8, aux: "SymOff", symEffect: "Write"},                    // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
                // TODO: add size-mismatched indexed loads, like MOVBstoreidx4.
 
                // For storeconst ops, the AuxInt field encodes both
@@ -616,13 +615,13 @@ func init() {
                {name: "MOVLstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVL", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 4 bytes of ...
                {name: "MOVQstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVQ", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of ...
 
-               {name: "MOVBstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVB", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+1*arg1+ValAndOff(AuxInt).Off()+aux.  arg2=mem
-               {name: "MOVWstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVW", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low 2 bytes of ... arg1 ...
-               {name: "MOVWstoreconstidx2", argLength: 3, reg: gpstoreconstidx, asm: "MOVW", scale: 2, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low 2 bytes of ... 2*arg1 ...
-               {name: "MOVLstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low 4 bytes of ... arg1 ...
-               {name: "MOVLstoreconstidx4", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", scale: 4, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low 4 bytes of ... 4*arg1 ...
-               {name: "MOVQstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVQ", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store 8 bytes of ... arg1 ...
-               {name: "MOVQstoreconstidx8", argLength: 3, reg: gpstoreconstidx, asm: "MOVQ", scale: 8, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store 8 bytes of ... 8*arg1 ...
+               {name: "MOVBstoreconstidx1", argLength: 3, reg: gpstoreconstidx, commutative: true, asm: "MOVB", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+1*arg1+ValAndOff(AuxInt).Off()+aux.  arg2=mem
+               {name: "MOVWstoreconstidx1", argLength: 3, reg: gpstoreconstidx, commutative: true, asm: "MOVW", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low 2 bytes of ... arg1 ...
+               {name: "MOVWstoreconstidx2", argLength: 3, reg: gpstoreconstidx, asm: "MOVW", scale: 2, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"},                    // store low 2 bytes of ... 2*arg1 ...
+               {name: "MOVLstoreconstidx1", argLength: 3, reg: gpstoreconstidx, commutative: true, asm: "MOVL", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low 4 bytes of ... arg1 ...
+               {name: "MOVLstoreconstidx4", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", scale: 4, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"},                    // store low 4 bytes of ... 4*arg1 ...
+               {name: "MOVQstoreconstidx1", argLength: 3, reg: gpstoreconstidx, commutative: true, asm: "MOVQ", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store 8 bytes of ... arg1 ...
+               {name: "MOVQstoreconstidx8", argLength: 3, reg: gpstoreconstidx, asm: "MOVQ", scale: 8, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"},                    // store 8 bytes of ... 8*arg1 ...
 
                // arg0 = pointer to start of memory to zero
                // arg1 = value to store (will always be zero)
index 85126619431b461aacf62514b88d52c6d8a40723..6e18f1933b1ff605dc424df95f4fb8417854810f 100644 (file)
@@ -10969,12 +10969,13 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVBstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVB,
-               scale:     1,
+               name:        "MOVBstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVB,
+               scale:       1,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -10984,12 +10985,13 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVWstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVW,
-               scale:     1,
+               name:        "MOVWstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVW,
+               scale:       1,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -11014,12 +11016,13 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVLstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVL,
-               scale:     1,
+               name:        "MOVLstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVL,
+               scale:       1,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -11059,12 +11062,13 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVQstoreidx1",
-               auxType:   auxSymOff,
-               argLen:    4,
-               symEffect: SymWrite,
-               asm:       x86.AMOVQ,
-               scale:     1,
+               name:        "MOVQstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVQ,
+               scale:       1,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -11141,12 +11145,13 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVBstoreconstidx1",
-               auxType:   auxSymValAndOff,
-               argLen:    3,
-               symEffect: SymWrite,
-               asm:       x86.AMOVB,
-               scale:     1,
+               name:        "MOVBstoreconstidx1",
+               auxType:     auxSymValAndOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVB,
+               scale:       1,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -11155,12 +11160,13 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVWstoreconstidx1",
-               auxType:   auxSymValAndOff,
-               argLen:    3,
-               symEffect: SymWrite,
-               asm:       x86.AMOVW,
-               scale:     1,
+               name:        "MOVWstoreconstidx1",
+               auxType:     auxSymValAndOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVW,
+               scale:       1,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -11183,12 +11189,13 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVLstoreconstidx1",
-               auxType:   auxSymValAndOff,
-               argLen:    3,
-               symEffect: SymWrite,
-               asm:       x86.AMOVL,
-               scale:     1,
+               name:        "MOVLstoreconstidx1",
+               auxType:     auxSymValAndOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVL,
+               scale:       1,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
@@ -11211,12 +11218,13 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:      "MOVQstoreconstidx1",
-               auxType:   auxSymValAndOff,
-               argLen:    3,
-               symEffect: SymWrite,
-               asm:       x86.AMOVQ,
-               scale:     1,
+               name:        "MOVQstoreconstidx1",
+               auxType:     auxSymValAndOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVQ,
+               scale:       1,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
index 11bf7141959db0d33a98727381cd400c35ce98bb..71ecdf081a279e5c2bad78f691b82c5ff956f7cd 100644 (file)
@@ -12317,23 +12317,26 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconstidx1_0(v *Value) bool {
                x := v.AuxInt
                sym := v.Aux
                mem := v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               if !(ValAndOff(x).canAdd(c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       v_0 := v.Args[_i0]
+                       if v_0.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       c := v_0.AuxInt
+                       ptr := v_0.Args[0]
+                       idx := v.Args[1^_i0]
+                       if !(ValAndOff(x).canAdd(c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVBstoreconstidx1)
+                       v.AuxInt = ValAndOff(x).add(c)
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVBstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVBstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
        // cond: ValAndOff(x).canAdd(c)
@@ -12342,23 +12345,26 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconstidx1_0(v *Value) bool {
                x := v.AuxInt
                sym := v.Aux
                mem := v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               if !(ValAndOff(x).canAdd(c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       c := v_1.AuxInt
+                       idx := v_1.Args[0]
+                       if !(ValAndOff(x).canAdd(c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVBstoreconstidx1)
+                       v.AuxInt = ValAndOff(x).add(c)
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVBstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
        // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
@@ -12367,27 +12373,32 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconstidx1_0(v *Value) bool {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVBstoreconstidx1 {
-                       break
-               }
-               a := x.AuxInt
-               if x.Aux != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       i := v.Args[1^_i0]
+                       x := v.Args[2]
+                       if x.Op != OpAMD64MOVBstoreconstidx1 {
+                               continue
+                       }
+                       a := x.AuxInt
+                       if x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[2]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || i != x.Args[1^_i1] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVWstoreconstidx1)
+                               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(i)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(i)
-               v.AddArg(mem)
-               return true
+               break
        }
        return false
 }
@@ -12400,25 +12411,28 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               if !(is32Bit(c + d)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       v_0 := v.Args[_i0]
+                       if v_0.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       d := v_0.AuxInt
+                       ptr := v_0.Args[0]
+                       idx := v.Args[1^_i0]
+                       val := v.Args[2]
+                       if !(is32Bit(c + d)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVBstoreidx1)
+                       v.AuxInt = c + d
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVBstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVBstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
        // cond: is32Bit(c+d)
@@ -12427,25 +12441,28 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               if !(is32Bit(c + d)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       d := v_1.AuxInt
+                       idx := v_1.Args[0]
+                       val := v.Args[2]
+                       if !(is32Bit(c + d)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVBstoreidx1)
+                       v.AuxInt = c + d
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVBstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVBstoreidx1 [i] {s} p idx w x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
        // cond: x0.Uses == 1 && clobber(x0)
@@ -12454,32 +12471,37 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x0 := v.Args[3]
-               if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-1 || x0.Aux != s {
-                       break
-               }
-               mem := x0.Args[3]
-               if p != x0.Args[0] || idx != x0.Args[1] {
-                       break
-               }
-               x0_2 := x0.Args[2]
-               if x0_2.Op != OpAMD64SHRWconst || x0_2.AuxInt != 8 || w != x0_2.Args[0] || !(x0.Uses == 1 && clobber(x0)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       w := v.Args[2]
+                       x0 := v.Args[3]
+                       if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-1 || x0.Aux != s {
+                               continue
+                       }
+                       mem := x0.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x0.Args[_i1] || idx != x0.Args[1^_i1] {
+                                       continue
+                               }
+                               x0_2 := x0.Args[2]
+                               if x0_2.Op != OpAMD64SHRWconst || x0_2.AuxInt != 8 || w != x0_2.Args[0] || !(x0.Uses == 1 && clobber(x0)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVWstoreidx1)
+                               v.AuxInt = i - 1
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
+                               v0.AuxInt = 8
+                               v0.AddArg(w)
+                               v.AddArg(v0)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
-               v0.AuxInt = 8
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
        // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
@@ -12488,55 +12510,64 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x2 := v.Args[3]
-               if x2.Op != OpAMD64MOVBstoreidx1 || x2.AuxInt != i-1 || x2.Aux != s {
-                       break
-               }
-               _ = x2.Args[3]
-               if p != x2.Args[0] || idx != x2.Args[1] {
-                       break
-               }
-               x2_2 := x2.Args[2]
-               if x2_2.Op != OpAMD64SHRLconst || x2_2.AuxInt != 8 || w != x2_2.Args[0] {
-                       break
-               }
-               x1 := x2.Args[3]
-               if x1.Op != OpAMD64MOVBstoreidx1 || x1.AuxInt != i-2 || x1.Aux != s {
-                       break
-               }
-               _ = x1.Args[3]
-               if p != x1.Args[0] || idx != x1.Args[1] {
-                       break
-               }
-               x1_2 := x1.Args[2]
-               if x1_2.Op != OpAMD64SHRLconst || x1_2.AuxInt != 16 || w != x1_2.Args[0] {
-                       break
-               }
-               x0 := x1.Args[3]
-               if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-3 || x0.Aux != s {
-                       break
-               }
-               mem := x0.Args[3]
-               if p != x0.Args[0] || idx != x0.Args[1] {
-                       break
-               }
-               x0_2 := x0.Args[2]
-               if x0_2.Op != OpAMD64SHRLconst || x0_2.AuxInt != 24 || w != x0_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       w := v.Args[2]
+                       x2 := v.Args[3]
+                       if x2.Op != OpAMD64MOVBstoreidx1 || x2.AuxInt != i-1 || x2.Aux != s {
+                               continue
+                       }
+                       _ = x2.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x2.Args[_i1] || idx != x2.Args[1^_i1] {
+                                       continue
+                               }
+                               x2_2 := x2.Args[2]
+                               if x2_2.Op != OpAMD64SHRLconst || x2_2.AuxInt != 8 || w != x2_2.Args[0] {
+                                       continue
+                               }
+                               x1 := x2.Args[3]
+                               if x1.Op != OpAMD64MOVBstoreidx1 || x1.AuxInt != i-2 || x1.Aux != s {
+                                       continue
+                               }
+                               _ = x1.Args[3]
+                               for _i2 := 0; _i2 <= 1; _i2++ {
+                                       if p != x1.Args[_i2] || idx != x1.Args[1^_i2] {
+                                               continue
+                                       }
+                                       x1_2 := x1.Args[2]
+                                       if x1_2.Op != OpAMD64SHRLconst || x1_2.AuxInt != 16 || w != x1_2.Args[0] {
+                                               continue
+                                       }
+                                       x0 := x1.Args[3]
+                                       if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-3 || x0.Aux != s {
+                                               continue
+                                       }
+                                       mem := x0.Args[3]
+                                       for _i3 := 0; _i3 <= 1; _i3++ {
+                                               if p != x0.Args[_i3] || idx != x0.Args[1^_i3] {
+                                                       continue
+                                               }
+                                               x0_2 := x0.Args[2]
+                                               if x0_2.Op != OpAMD64SHRLconst || x0_2.AuxInt != 24 || w != x0_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                                                       continue
+                                               }
+                                               v.reset(OpAMD64MOVLstoreidx1)
+                                               v.AuxInt = i - 3
+                                               v.Aux = s
+                                               v.AddArg(p)
+                                               v.AddArg(idx)
+                                               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+                                               v0.AddArg(w)
+                                               v.AddArg(v0)
+                                               v.AddArg(mem)
+                                               return true
+                                       }
+                               }
+                       }
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 3
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
        // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
@@ -12545,103 +12576,120 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x6 := v.Args[3]
-               if x6.Op != OpAMD64MOVBstoreidx1 || x6.AuxInt != i-1 || x6.Aux != s {
-                       break
-               }
-               _ = x6.Args[3]
-               if p != x6.Args[0] || idx != x6.Args[1] {
-                       break
-               }
-               x6_2 := x6.Args[2]
-               if x6_2.Op != OpAMD64SHRQconst || x6_2.AuxInt != 8 || w != x6_2.Args[0] {
-                       break
-               }
-               x5 := x6.Args[3]
-               if x5.Op != OpAMD64MOVBstoreidx1 || x5.AuxInt != i-2 || x5.Aux != s {
-                       break
-               }
-               _ = x5.Args[3]
-               if p != x5.Args[0] || idx != x5.Args[1] {
-                       break
-               }
-               x5_2 := x5.Args[2]
-               if x5_2.Op != OpAMD64SHRQconst || x5_2.AuxInt != 16 || w != x5_2.Args[0] {
-                       break
-               }
-               x4 := x5.Args[3]
-               if x4.Op != OpAMD64MOVBstoreidx1 || x4.AuxInt != i-3 || x4.Aux != s {
-                       break
-               }
-               _ = x4.Args[3]
-               if p != x4.Args[0] || idx != x4.Args[1] {
-                       break
-               }
-               x4_2 := x4.Args[2]
-               if x4_2.Op != OpAMD64SHRQconst || x4_2.AuxInt != 24 || w != x4_2.Args[0] {
-                       break
-               }
-               x3 := x4.Args[3]
-               if x3.Op != OpAMD64MOVBstoreidx1 || x3.AuxInt != i-4 || x3.Aux != s {
-                       break
-               }
-               _ = x3.Args[3]
-               if p != x3.Args[0] || idx != x3.Args[1] {
-                       break
-               }
-               x3_2 := x3.Args[2]
-               if x3_2.Op != OpAMD64SHRQconst || x3_2.AuxInt != 32 || w != x3_2.Args[0] {
-                       break
-               }
-               x2 := x3.Args[3]
-               if x2.Op != OpAMD64MOVBstoreidx1 || x2.AuxInt != i-5 || x2.Aux != s {
-                       break
-               }
-               _ = x2.Args[3]
-               if p != x2.Args[0] || idx != x2.Args[1] {
-                       break
-               }
-               x2_2 := x2.Args[2]
-               if x2_2.Op != OpAMD64SHRQconst || x2_2.AuxInt != 40 || w != x2_2.Args[0] {
-                       break
-               }
-               x1 := x2.Args[3]
-               if x1.Op != OpAMD64MOVBstoreidx1 || x1.AuxInt != i-6 || x1.Aux != s {
-                       break
-               }
-               _ = x1.Args[3]
-               if p != x1.Args[0] || idx != x1.Args[1] {
-                       break
-               }
-               x1_2 := x1.Args[2]
-               if x1_2.Op != OpAMD64SHRQconst || x1_2.AuxInt != 48 || w != x1_2.Args[0] {
-                       break
-               }
-               x0 := x1.Args[3]
-               if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-7 || x0.Aux != s {
-                       break
-               }
-               mem := x0.Args[3]
-               if p != x0.Args[0] || idx != x0.Args[1] {
-                       break
-               }
-               x0_2 := x0.Args[2]
-               if x0_2.Op != OpAMD64SHRQconst || x0_2.AuxInt != 56 || w != x0_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       w := v.Args[2]
+                       x6 := v.Args[3]
+                       if x6.Op != OpAMD64MOVBstoreidx1 || x6.AuxInt != i-1 || x6.Aux != s {
+                               continue
+                       }
+                       _ = x6.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x6.Args[_i1] || idx != x6.Args[1^_i1] {
+                                       continue
+                               }
+                               x6_2 := x6.Args[2]
+                               if x6_2.Op != OpAMD64SHRQconst || x6_2.AuxInt != 8 || w != x6_2.Args[0] {
+                                       continue
+                               }
+                               x5 := x6.Args[3]
+                               if x5.Op != OpAMD64MOVBstoreidx1 || x5.AuxInt != i-2 || x5.Aux != s {
+                                       continue
+                               }
+                               _ = x5.Args[3]
+                               for _i2 := 0; _i2 <= 1; _i2++ {
+                                       if p != x5.Args[_i2] || idx != x5.Args[1^_i2] {
+                                               continue
+                                       }
+                                       x5_2 := x5.Args[2]
+                                       if x5_2.Op != OpAMD64SHRQconst || x5_2.AuxInt != 16 || w != x5_2.Args[0] {
+                                               continue
+                                       }
+                                       x4 := x5.Args[3]
+                                       if x4.Op != OpAMD64MOVBstoreidx1 || x4.AuxInt != i-3 || x4.Aux != s {
+                                               continue
+                                       }
+                                       _ = x4.Args[3]
+                                       for _i3 := 0; _i3 <= 1; _i3++ {
+                                               if p != x4.Args[_i3] || idx != x4.Args[1^_i3] {
+                                                       continue
+                                               }
+                                               x4_2 := x4.Args[2]
+                                               if x4_2.Op != OpAMD64SHRQconst || x4_2.AuxInt != 24 || w != x4_2.Args[0] {
+                                                       continue
+                                               }
+                                               x3 := x4.Args[3]
+                                               if x3.Op != OpAMD64MOVBstoreidx1 || x3.AuxInt != i-4 || x3.Aux != s {
+                                                       continue
+                                               }
+                                               _ = x3.Args[3]
+                                               for _i4 := 0; _i4 <= 1; _i4++ {
+                                                       if p != x3.Args[_i4] || idx != x3.Args[1^_i4] {
+                                                               continue
+                                                       }
+                                                       x3_2 := x3.Args[2]
+                                                       if x3_2.Op != OpAMD64SHRQconst || x3_2.AuxInt != 32 || w != x3_2.Args[0] {
+                                                               continue
+                                                       }
+                                                       x2 := x3.Args[3]
+                                                       if x2.Op != OpAMD64MOVBstoreidx1 || x2.AuxInt != i-5 || x2.Aux != s {
+                                                               continue
+                                                       }
+                                                       _ = x2.Args[3]
+                                                       for _i5 := 0; _i5 <= 1; _i5++ {
+                                                               if p != x2.Args[_i5] || idx != x2.Args[1^_i5] {
+                                                                       continue
+                                                               }
+                                                               x2_2 := x2.Args[2]
+                                                               if x2_2.Op != OpAMD64SHRQconst || x2_2.AuxInt != 40 || w != x2_2.Args[0] {
+                                                                       continue
+                                                               }
+                                                               x1 := x2.Args[3]
+                                                               if x1.Op != OpAMD64MOVBstoreidx1 || x1.AuxInt != i-6 || x1.Aux != s {
+                                                                       continue
+                                                               }
+                                                               _ = x1.Args[3]
+                                                               for _i6 := 0; _i6 <= 1; _i6++ {
+                                                                       if p != x1.Args[_i6] || idx != x1.Args[1^_i6] {
+                                                                               continue
+                                                                       }
+                                                                       x1_2 := x1.Args[2]
+                                                                       if x1_2.Op != OpAMD64SHRQconst || x1_2.AuxInt != 48 || w != x1_2.Args[0] {
+                                                                               continue
+                                                                       }
+                                                                       x0 := x1.Args[3]
+                                                                       if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-7 || x0.Aux != s {
+                                                                               continue
+                                                                       }
+                                                                       mem := x0.Args[3]
+                                                                       for _i7 := 0; _i7 <= 1; _i7++ {
+                                                                               if p != x0.Args[_i7] || idx != x0.Args[1^_i7] {
+                                                                                       continue
+                                                                               }
+                                                                               x0_2 := x0.Args[2]
+                                                                               if x0_2.Op != OpAMD64SHRQconst || x0_2.AuxInt != 56 || w != x0_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                                                                                       continue
+                                                                               }
+                                                                               v.reset(OpAMD64MOVQstoreidx1)
+                                                                               v.AuxInt = i - 7
+                                                                               v.Aux = s
+                                                                               v.AddArg(p)
+                                                                               v.AddArg(idx)
+                                                                               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+                                                                               v0.AddArg(w)
+                                                                               v.AddArg(v0)
+                                                                               v.AddArg(mem)
+                                                                               return true
+                                                                       }
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 7
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
        // cond: x.Uses == 1 && clobber(x)
@@ -12650,29 +12698,34 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRWconst || v_2.AuxInt != 8 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       v_2 := v.Args[2]
+                       if v_2.Op != OpAMD64SHRWconst || v_2.AuxInt != 8 {
+                               continue
+                       }
+                       w := v_2.Args[0]
+                       x := v.Args[3]
+                       if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || idx != x.Args[1^_i1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVWstoreidx1)
+                               v.AuxInt = i - 1
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v.AddArg(w)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
        // cond: x.Uses == 1 && clobber(x)
@@ -12681,29 +12734,34 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRLconst || v_2.AuxInt != 8 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       v_2 := v.Args[2]
+                       if v_2.Op != OpAMD64SHRLconst || v_2.AuxInt != 8 {
+                               continue
+                       }
+                       w := v_2.Args[0]
+                       x := v.Args[3]
+                       if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || idx != x.Args[1^_i1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVWstoreidx1)
+                               v.AuxInt = i - 1
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v.AddArg(w)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
        // cond: x.Uses == 1 && clobber(x)
@@ -12712,29 +12770,34 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 8 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       v_2 := v.Args[2]
+                       if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 8 {
+                               continue
+                       }
+                       w := v_2.Args[0]
+                       x := v.Args[3]
+                       if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || idx != x.Args[1^_i1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVWstoreidx1)
+                               v.AuxInt = i - 1
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v.AddArg(w)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
        // cond: x.Uses == 1 && clobber(x)
@@ -12743,34 +12806,39 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRLconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       v_2 := v.Args[2]
+                       if v_2.Op != OpAMD64SHRLconst {
+                               continue
+                       }
+                       j := v_2.AuxInt
+                       w := v_2.Args[0]
+                       x := v.Args[3]
+                       if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || idx != x.Args[1^_i1] {
+                                       continue
+                               }
+                               w0 := x.Args[2]
+                               if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVWstoreidx1)
+                               v.AuxInt = i - 1
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v.AddArg(w0)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem))
        // cond: x.Uses == 1 && clobber(x)
@@ -12779,34 +12847,39 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       v_2 := v.Args[2]
+                       if v_2.Op != OpAMD64SHRQconst {
+                               continue
+                       }
+                       j := v_2.AuxInt
+                       w := v_2.Args[0]
+                       x := v.Args[3]
+                       if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || idx != x.Args[1^_i1] {
+                                       continue
+                               }
+                               w0 := x.Args[2]
+                               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVWstoreidx1)
+                               v.AuxInt = i - 1
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v.AddArg(w0)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
+               break
        }
        return false
 }
@@ -12818,23 +12891,26 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1_10(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                mem := v.Args[3]
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               w := v.Args[2]
-               if !(is32Bit(i + c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64MOVQconst {
+                               continue
+                       }
+                       c := v_1.AuxInt
+                       w := v.Args[2]
+                       if !(is32Bit(i + c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVBstore)
+                       v.AuxInt = i + c
+                       v.Aux = s
+                       v.AddArg(p)
+                       v.AddArg(w)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = i + c
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
+               break
        }
        return false
 }
@@ -15069,19 +15145,22 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
+                               continue
+                       }
+                       idx := v_1.Args[0]
+                       v.reset(OpAMD64MOVLstoreconstidx4)
+                       v.AuxInt = c
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(mem)
+                       return true
                }
-               idx := v_1.Args[0]
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVLstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
        // cond: ValAndOff(x).canAdd(c)
@@ -15090,23 +15169,26 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
                x := v.AuxInt
                sym := v.Aux
                mem := v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               if !(ValAndOff(x).canAdd(c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       v_0 := v.Args[_i0]
+                       if v_0.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       c := v_0.AuxInt
+                       ptr := v_0.Args[0]
+                       idx := v.Args[1^_i0]
+                       if !(ValAndOff(x).canAdd(c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVLstoreconstidx1)
+                       v.AuxInt = ValAndOff(x).add(c)
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
        // cond: ValAndOff(x).canAdd(c)
@@ -15115,23 +15197,26 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
                x := v.AuxInt
                sym := v.Aux
                mem := v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               if !(ValAndOff(x).canAdd(c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       c := v_1.AuxInt
+                       idx := v_1.Args[0]
+                       if !(ValAndOff(x).canAdd(c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVLstoreconstidx1)
+                       v.AuxInt = ValAndOff(x).add(c)
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem))
        // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
@@ -15140,30 +15225,35 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVLstoreconstidx1 {
-                       break
-               }
-               a := x.AuxInt
-               if x.Aux != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       i := v.Args[1^_i0]
+                       x := v.Args[2]
+                       if x.Op != OpAMD64MOVLstoreconstidx1 {
+                               continue
+                       }
+                       a := x.AuxInt
+                       if x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[2]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || i != x.Args[1^_i1] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVQstoreidx1)
+                               v.AuxInt = ValAndOff(a).Off()
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(i)
+                               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+                               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
+                               v.AddArg(v0)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = ValAndOff(a).Off()
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
+               break
        }
        return false
 }
@@ -15264,21 +15354,24 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
+                               continue
+                       }
+                       idx := v_1.Args[0]
+                       val := v.Args[2]
+                       v.reset(OpAMD64MOVLstoreidx4)
+                       v.AuxInt = c
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVLstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
        // result: (MOVLstoreidx8 [c] {sym} ptr idx val mem)
@@ -15286,21 +15379,24 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
+                               continue
+                       }
+                       idx := v_1.Args[0]
+                       val := v.Args[2]
+                       v.reset(OpAMD64MOVLstoreidx8)
+                       v.AuxInt = c
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               v.reset(OpAMD64MOVLstoreidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVLstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
        // cond: is32Bit(c+d)
@@ -15309,25 +15405,28 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               if !(is32Bit(c + d)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       v_0 := v.Args[_i0]
+                       if v_0.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       d := v_0.AuxInt
+                       ptr := v_0.Args[0]
+                       idx := v.Args[1^_i0]
+                       val := v.Args[2]
+                       if !(is32Bit(c + d)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVLstoreidx1)
+                       v.AuxInt = c + d
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVLstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
        // cond: is32Bit(c+d)
@@ -15336,25 +15435,28 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               if !(is32Bit(c + d)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       d := v_1.AuxInt
+                       idx := v_1.Args[0]
+                       val := v.Args[2]
+                       if !(is32Bit(c + d)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVLstoreidx1)
+                       v.AuxInt = c + d
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem))
        // cond: x.Uses == 1 && clobber(x)
@@ -15363,29 +15465,34 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 32 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVLstoreidx1 || x.AuxInt != i-4 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       v_2 := v.Args[2]
+                       if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 32 {
+                               continue
+                       }
+                       w := v_2.Args[0]
+                       x := v.Args[3]
+                       if x.Op != OpAMD64MOVLstoreidx1 || x.AuxInt != i-4 || x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || idx != x.Args[1^_i1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVQstoreidx1)
+                               v.AuxInt = i - 4
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v.AddArg(w)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
        // cond: x.Uses == 1 && clobber(x)
@@ -15394,34 +15501,39 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVLstoreidx1 || x.AuxInt != i-4 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       v_2 := v.Args[2]
+                       if v_2.Op != OpAMD64SHRQconst {
+                               continue
+                       }
+                       j := v_2.AuxInt
+                       w := v_2.Args[0]
+                       x := v.Args[3]
+                       if x.Op != OpAMD64MOVLstoreidx1 || x.AuxInt != i-4 || x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || idx != x.Args[1^_i1] {
+                                       continue
+                               }
+                               w0 := x.Args[2]
+                               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVQstoreidx1)
+                               v.AuxInt = i - 4
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v.AddArg(w0)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVLstoreidx1 [i] {s} p (MOVQconst [c]) w mem)
        // cond: is32Bit(i+c)
@@ -15430,23 +15542,26 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                mem := v.Args[3]
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               w := v.Args[2]
-               if !(is32Bit(i + c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64MOVQconst {
+                               continue
+                       }
+                       c := v_1.AuxInt
+                       w := v.Args[2]
+                       if !(is32Bit(i + c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVLstore)
+                       v.AuxInt = i + c
+                       v.Aux = s
+                       v.AddArg(p)
+                       v.AddArg(w)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i + c
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
+               break
        }
        return false
 }
@@ -17330,19 +17445,22 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
+                               continue
+                       }
+                       idx := v_1.Args[0]
+                       v.reset(OpAMD64MOVQstoreconstidx8)
+                       v.AuxInt = c
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(mem)
+                       return true
                }
-               idx := v_1.Args[0]
-               v.reset(OpAMD64MOVQstoreconstidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVQstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
        // cond: ValAndOff(x).canAdd(c)
@@ -17351,23 +17469,26 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1_0(v *Value) bool {
                x := v.AuxInt
                sym := v.Aux
                mem := v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               if !(ValAndOff(x).canAdd(c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       v_0 := v.Args[_i0]
+                       if v_0.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       c := v_0.AuxInt
+                       ptr := v_0.Args[0]
+                       idx := v.Args[1^_i0]
+                       if !(ValAndOff(x).canAdd(c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVQstoreconstidx1)
+                       v.AuxInt = ValAndOff(x).add(c)
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVQstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVQstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
        // cond: ValAndOff(x).canAdd(c)
@@ -17376,23 +17497,26 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1_0(v *Value) bool {
                x := v.AuxInt
                sym := v.Aux
                mem := v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               if !(ValAndOff(x).canAdd(c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       c := v_1.AuxInt
+                       idx := v_1.Args[0]
+                       if !(ValAndOff(x).canAdd(c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVQstoreconstidx1)
+                       v.AuxInt = ValAndOff(x).add(c)
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVQstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               break
        }
        return false
 }
@@ -17456,21 +17580,24 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
+                               continue
+                       }
+                       idx := v_1.Args[0]
+                       val := v.Args[2]
+                       v.reset(OpAMD64MOVQstoreidx8)
+                       v.AuxInt = c
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               v.reset(OpAMD64MOVQstoreidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVQstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
        // cond: is32Bit(c+d)
@@ -17479,25 +17606,28 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               if !(is32Bit(c + d)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       v_0 := v.Args[_i0]
+                       if v_0.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       d := v_0.AuxInt
+                       ptr := v_0.Args[0]
+                       idx := v.Args[1^_i0]
+                       val := v.Args[2]
+                       if !(is32Bit(c + d)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVQstoreidx1)
+                       v.AuxInt = c + d
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVQstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
        // cond: is32Bit(c+d)
@@ -17506,25 +17636,28 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               if !(is32Bit(c + d)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       d := v_1.AuxInt
+                       idx := v_1.Args[0]
+                       val := v.Args[2]
+                       if !(is32Bit(c + d)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVQstoreidx1)
+                       v.AuxInt = c + d
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVQstoreidx1 [i] {s} p (MOVQconst [c]) w mem)
        // cond: is32Bit(i+c)
@@ -17533,23 +17666,26 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                mem := v.Args[3]
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               w := v.Args[2]
-               if !(is32Bit(i + c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64MOVQconst {
+                               continue
+                       }
+                       c := v_1.AuxInt
+                       w := v.Args[2]
+                       if !(is32Bit(i + c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVQstore)
+                       v.AuxInt = i + c
+                       v.Aux = s
+                       v.AddArg(p)
+                       v.AddArg(w)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = i + c
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
+               break
        }
        return false
 }
@@ -20412,19 +20548,22 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 {
+                               continue
+                       }
+                       idx := v_1.Args[0]
+                       v.reset(OpAMD64MOVWstoreconstidx2)
+                       v.AuxInt = c
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(mem)
+                       return true
                }
-               idx := v_1.Args[0]
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVWstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
        // cond: ValAndOff(x).canAdd(c)
@@ -20433,23 +20572,26 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1_0(v *Value) bool {
                x := v.AuxInt
                sym := v.Aux
                mem := v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               if !(ValAndOff(x).canAdd(c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       v_0 := v.Args[_i0]
+                       if v_0.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       c := v_0.AuxInt
+                       ptr := v_0.Args[0]
+                       idx := v.Args[1^_i0]
+                       if !(ValAndOff(x).canAdd(c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVWstoreconstidx1)
+                       v.AuxInt = ValAndOff(x).add(c)
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
        // cond: ValAndOff(x).canAdd(c)
@@ -20458,23 +20600,26 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1_0(v *Value) bool {
                x := v.AuxInt
                sym := v.Aux
                mem := v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               if !(ValAndOff(x).canAdd(c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       c := v_1.AuxInt
+                       idx := v_1.Args[0]
+                       if !(ValAndOff(x).canAdd(c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVWstoreconstidx1)
+                       v.AuxInt = ValAndOff(x).add(c)
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
        // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
@@ -20483,27 +20628,32 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1_0(v *Value) bool {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVWstoreconstidx1 {
-                       break
-               }
-               a := x.AuxInt
-               if x.Aux != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       i := v.Args[1^_i0]
+                       x := v.Args[2]
+                       if x.Op != OpAMD64MOVWstoreconstidx1 {
+                               continue
+                       }
+                       a := x.AuxInt
+                       if x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[2]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || i != x.Args[1^_i1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVLstoreconstidx1)
+                               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(i)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(i)
-               v.AddArg(mem)
-               return true
+               break
        }
        return false
 }
@@ -20600,48 +20750,54 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 {
-                       break
-               }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               v.reset(OpAMD64MOVWstoreidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               mem := v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 {
+                               continue
+                       }
+                       idx := v_1.Args[0]
+                       val := v.Args[2]
+                       v.reset(OpAMD64MOVWstoreidx2)
+                       v.AuxInt = c
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               if !(is32Bit(c + d)) {
-                       break
+               break
+       }
+       // match: (MOVWstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               mem := v.Args[3]
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       v_0 := v.Args[_i0]
+                       if v_0.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       d := v_0.AuxInt
+                       ptr := v_0.Args[0]
+                       idx := v.Args[1^_i0]
+                       val := v.Args[2]
+                       if !(is32Bit(c + d)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVWstoreidx1)
+                       v.AuxInt = c + d
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVWstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
        // cond: is32Bit(c+d)
@@ -20650,25 +20806,28 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                c := v.AuxInt
                sym := v.Aux
                mem := v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               if !(is32Bit(c + d)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       ptr := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64ADDQconst {
+                               continue
+                       }
+                       d := v_1.AuxInt
+                       idx := v_1.Args[0]
+                       val := v.Args[2]
+                       if !(is32Bit(c + d)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVWstoreidx1)
+                       v.AuxInt = c + d
+                       v.Aux = sym
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
+                       v.AddArg(val)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
        // cond: x.Uses == 1 && clobber(x)
@@ -20677,29 +20836,34 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRLconst || v_2.AuxInt != 16 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       v_2 := v.Args[2]
+                       if v_2.Op != OpAMD64SHRLconst || v_2.AuxInt != 16 {
+                               continue
+                       }
+                       w := v_2.Args[0]
+                       x := v.Args[3]
+                       if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || idx != x.Args[1^_i1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVLstoreidx1)
+                               v.AuxInt = i - 2
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v.AddArg(w)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
        // cond: x.Uses == 1 && clobber(x)
@@ -20708,29 +20872,34 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 16 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       v_2 := v.Args[2]
+                       if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 16 {
+                               continue
+                       }
+                       w := v_2.Args[0]
+                       x := v.Args[3]
+                       if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || idx != x.Args[1^_i1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVLstoreidx1)
+                               v.AuxInt = i - 2
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v.AddArg(w)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
        // cond: x.Uses == 1 && clobber(x)
@@ -20739,34 +20908,39 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRLconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       v_2 := v.Args[2]
+                       if v_2.Op != OpAMD64SHRLconst {
+                               continue
+                       }
+                       j := v_2.AuxInt
+                       w := v_2.Args[0]
+                       x := v.Args[3]
+                       if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || idx != x.Args[1^_i1] {
+                                       continue
+                               }
+                               w0 := x.Args[2]
+                               if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVLstoreidx1)
+                               v.AuxInt = i - 2
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v.AddArg(w0)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
        // cond: x.Uses == 1 && clobber(x)
@@ -20775,34 +20949,39 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       idx := v.Args[1^_i0]
+                       v_2 := v.Args[2]
+                       if v_2.Op != OpAMD64SHRQconst {
+                               continue
+                       }
+                       j := v_2.AuxInt
+                       w := v_2.Args[0]
+                       x := v.Args[3]
+                       if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
+                               continue
+                       }
+                       mem := x.Args[3]
+                       for _i1 := 0; _i1 <= 1; _i1++ {
+                               if p != x.Args[_i1] || idx != x.Args[1^_i1] {
+                                       continue
+                               }
+                               w0 := x.Args[2]
+                               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64MOVLstoreidx1)
+                               v.AuxInt = i - 2
+                               v.Aux = s
+                               v.AddArg(p)
+                               v.AddArg(idx)
+                               v.AddArg(w0)
+                               v.AddArg(mem)
+                               return true
+                       }
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
+               break
        }
        // match: (MOVWstoreidx1 [i] {s} p (MOVQconst [c]) w mem)
        // cond: is32Bit(i+c)
@@ -20811,23 +20990,26 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                i := v.AuxInt
                s := v.Aux
                mem := v.Args[3]
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               w := v.Args[2]
-               if !(is32Bit(i + c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0++ {
+                       p := v.Args[_i0]
+                       v_1 := v.Args[1^_i0]
+                       if v_1.Op != OpAMD64MOVQconst {
+                               continue
+                       }
+                       c := v_1.AuxInt
+                       w := v.Args[2]
+                       if !(is32Bit(i + c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MOVWstore)
+                       v.AuxInt = i + c
+                       v.Aux = s
+                       v.AddArg(p)
+                       v.AddArg(w)
+                       v.AddArg(mem)
+                       return true
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i + c
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
+               break
        }
        return false
 }