]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: insert complicated x86 addressing modes as a separate pass
authorKeith Randall <khr@golang.org>
Thu, 30 Jan 2020 18:17:01 +0000 (10:17 -0800)
committerKeith Randall <khr@golang.org>
Tue, 10 Mar 2020 00:13:21 +0000 (00:13 +0000)
Use a separate compiler pass to introduce complicated x86 addressing
modes.  Loads in the normal architecture rules (for x86 and all other
platforms) can have constant offsets (AuxInt values) and symbols (Aux
values), but no more.

The complex addressing modes (x+y, x+2*y, etc.) are introduced in a
separate pass that combines loads with LEAQx ops.

Organizing rewrites this way simplifies the number of rewrites
required, as there are lots of different rule orderings that have to
be specified to ensure these complex addressing modes are always found
if they are possible.

Update #36468

Change-Id: I5b4bf7b03a1e731d6dfeb9ef19b376175f3b4b44
Reviewed-on: https://go-review.googlesource.com/c/go/+/217097
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
src/cmd/compile/internal/ssa/addressingmodes.go [new file with mode: 0644]
src/cmd/compile/internal/ssa/compile.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/rewrite.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/memops.go

diff --git a/src/cmd/compile/internal/ssa/addressingmodes.go b/src/cmd/compile/internal/ssa/addressingmodes.go
new file mode 100644 (file)
index 0000000..8874b56
--- /dev/null
@@ -0,0 +1,154 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// addressingModes combines address calculations into memory operations
+// that can perform complicated addressing modes.
+func addressingModes(f *Func) {
+       switch f.Config.arch {
+       default:
+               // Most architectures can't do this.
+               return
+       case "amd64":
+               // TODO: 386, s390x?
+       }
+
+       var tmp []*Value
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       if !combineFirst[v.Op] {
+                               continue
+                       }
+                       p := v.Args[0]
+                       c, ok := combine[[2]Op{v.Op, p.Op}]
+                       if !ok {
+                               continue
+                       }
+                       // See if we can combine the Aux/AuxInt values.
+                       switch [2]auxType{opcodeTable[v.Op].auxType, opcodeTable[p.Op].auxType} {
+                       case [2]auxType{auxSymOff, auxInt32}:
+                               // TODO: introduce auxSymOff32
+                               if !is32Bit(v.AuxInt + p.AuxInt) {
+                                       continue
+                               }
+                               v.AuxInt += p.AuxInt
+                       case [2]auxType{auxSymOff, auxSymOff}:
+                               if v.Aux != nil && p.Aux != nil {
+                                       continue
+                               }
+                               if !is32Bit(v.AuxInt + p.AuxInt) {
+                                       continue
+                               }
+                               if p.Aux != nil {
+                                       v.Aux = p.Aux
+                               }
+                               v.AuxInt += p.AuxInt
+                       case [2]auxType{auxSymValAndOff, auxInt32}:
+                               vo := ValAndOff(v.AuxInt)
+                               if !vo.canAdd(p.AuxInt) {
+                                       continue
+                               }
+                               v.AuxInt = vo.add(p.AuxInt)
+                       case [2]auxType{auxSymValAndOff, auxSymOff}:
+                               vo := ValAndOff(v.AuxInt)
+                               if v.Aux != nil && p.Aux != nil {
+                                       continue
+                               }
+                               if !vo.canAdd(p.AuxInt) {
+                                       continue
+                               }
+                               if p.Aux != nil {
+                                       v.Aux = p.Aux
+                               }
+                               v.AuxInt = vo.add(p.AuxInt)
+                       case [2]auxType{auxSymOff, auxNone}:
+                               // nothing to do
+                       case [2]auxType{auxSymValAndOff, auxNone}:
+                               // nothing to do
+                       default:
+                               f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op)
+                       }
+                       // Combine the operations.
+                       tmp = append(tmp[:0], v.Args[1:]...)
+                       v.resetArgs()
+                       v.Op = c
+                       v.AddArgs(p.Args...)
+                       v.AddArgs(tmp...)
+               }
+       }
+}
+
+// combineFirst contains ops which appear in combine as the
+// first part of the key.
+var combineFirst = map[Op]bool{}
+
+func init() {
+       for k := range combine {
+               combineFirst[k[0]] = true
+       }
+}
+
+// For each entry k, v in this map, if we have a value x with:
+//   x.Op == k[0]
+//   x.Args[0].Op == k[1]
+// then we can set x.Op to v and set x.Args like this:
+//   x.Args[0].Args + x.Args[1:]
+// Additionally, the Aux/AuxInt from x.Args[0] is merged into x.
+var combine = map[[2]Op]Op{
+       [2]Op{OpAMD64MOVBload, OpAMD64ADDQ}:  OpAMD64MOVBloadidx1,
+       [2]Op{OpAMD64MOVWload, OpAMD64ADDQ}:  OpAMD64MOVWloadidx1,
+       [2]Op{OpAMD64MOVLload, OpAMD64ADDQ}:  OpAMD64MOVLloadidx1,
+       [2]Op{OpAMD64MOVQload, OpAMD64ADDQ}:  OpAMD64MOVQloadidx1,
+       [2]Op{OpAMD64MOVSSload, OpAMD64ADDQ}: OpAMD64MOVSSloadidx1,
+       [2]Op{OpAMD64MOVSDload, OpAMD64ADDQ}: OpAMD64MOVSDloadidx1,
+
+       [2]Op{OpAMD64MOVBstore, OpAMD64ADDQ}:  OpAMD64MOVBstoreidx1,
+       [2]Op{OpAMD64MOVWstore, OpAMD64ADDQ}:  OpAMD64MOVWstoreidx1,
+       [2]Op{OpAMD64MOVLstore, OpAMD64ADDQ}:  OpAMD64MOVLstoreidx1,
+       [2]Op{OpAMD64MOVQstore, OpAMD64ADDQ}:  OpAMD64MOVQstoreidx1,
+       [2]Op{OpAMD64MOVSSstore, OpAMD64ADDQ}: OpAMD64MOVSSstoreidx1,
+       [2]Op{OpAMD64MOVSDstore, OpAMD64ADDQ}: OpAMD64MOVSDstoreidx1,
+
+       [2]Op{OpAMD64MOVBstoreconst, OpAMD64ADDQ}: OpAMD64MOVBstoreconstidx1,
+       [2]Op{OpAMD64MOVWstoreconst, OpAMD64ADDQ}: OpAMD64MOVWstoreconstidx1,
+       [2]Op{OpAMD64MOVLstoreconst, OpAMD64ADDQ}: OpAMD64MOVLstoreconstidx1,
+       [2]Op{OpAMD64MOVQstoreconst, OpAMD64ADDQ}: OpAMD64MOVQstoreconstidx1,
+
+       [2]Op{OpAMD64MOVBload, OpAMD64LEAQ1}:  OpAMD64MOVBloadidx1,
+       [2]Op{OpAMD64MOVWload, OpAMD64LEAQ1}:  OpAMD64MOVWloadidx1,
+       [2]Op{OpAMD64MOVWload, OpAMD64LEAQ2}:  OpAMD64MOVWloadidx2,
+       [2]Op{OpAMD64MOVLload, OpAMD64LEAQ1}:  OpAMD64MOVLloadidx1,
+       [2]Op{OpAMD64MOVLload, OpAMD64LEAQ4}:  OpAMD64MOVLloadidx4,
+       [2]Op{OpAMD64MOVLload, OpAMD64LEAQ8}:  OpAMD64MOVLloadidx8,
+       [2]Op{OpAMD64MOVQload, OpAMD64LEAQ1}:  OpAMD64MOVQloadidx1,
+       [2]Op{OpAMD64MOVQload, OpAMD64LEAQ8}:  OpAMD64MOVQloadidx8,
+       [2]Op{OpAMD64MOVSSload, OpAMD64LEAQ1}: OpAMD64MOVSSloadidx1,
+       [2]Op{OpAMD64MOVSSload, OpAMD64LEAQ4}: OpAMD64MOVSSloadidx4,
+       [2]Op{OpAMD64MOVSDload, OpAMD64LEAQ1}: OpAMD64MOVSDloadidx1,
+       [2]Op{OpAMD64MOVSDload, OpAMD64LEAQ8}: OpAMD64MOVSDloadidx8,
+
+       [2]Op{OpAMD64MOVBstore, OpAMD64LEAQ1}:  OpAMD64MOVBstoreidx1,
+       [2]Op{OpAMD64MOVWstore, OpAMD64LEAQ1}:  OpAMD64MOVWstoreidx1,
+       [2]Op{OpAMD64MOVWstore, OpAMD64LEAQ2}:  OpAMD64MOVWstoreidx2,
+       [2]Op{OpAMD64MOVLstore, OpAMD64LEAQ1}:  OpAMD64MOVLstoreidx1,
+       [2]Op{OpAMD64MOVLstore, OpAMD64LEAQ4}:  OpAMD64MOVLstoreidx4,
+       [2]Op{OpAMD64MOVLstore, OpAMD64LEAQ8}:  OpAMD64MOVLstoreidx8,
+       [2]Op{OpAMD64MOVQstore, OpAMD64LEAQ1}:  OpAMD64MOVQstoreidx1,
+       [2]Op{OpAMD64MOVQstore, OpAMD64LEAQ8}:  OpAMD64MOVQstoreidx8,
+       [2]Op{OpAMD64MOVSSstore, OpAMD64LEAQ1}: OpAMD64MOVSSstoreidx1,
+       [2]Op{OpAMD64MOVSSstore, OpAMD64LEAQ4}: OpAMD64MOVSSstoreidx4,
+       [2]Op{OpAMD64MOVSDstore, OpAMD64LEAQ1}: OpAMD64MOVSDstoreidx1,
+       [2]Op{OpAMD64MOVSDstore, OpAMD64LEAQ8}: OpAMD64MOVSDstoreidx8,
+
+       [2]Op{OpAMD64MOVBstoreconst, OpAMD64LEAQ1}: OpAMD64MOVBstoreconstidx1,
+       [2]Op{OpAMD64MOVWstoreconst, OpAMD64LEAQ1}: OpAMD64MOVWstoreconstidx1,
+       [2]Op{OpAMD64MOVWstoreconst, OpAMD64LEAQ2}: OpAMD64MOVWstoreconstidx2,
+       [2]Op{OpAMD64MOVLstoreconst, OpAMD64LEAQ1}: OpAMD64MOVLstoreconstidx1,
+       [2]Op{OpAMD64MOVLstoreconst, OpAMD64LEAQ4}: OpAMD64MOVLstoreconstidx4,
+       [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1,
+       [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8,
+
+       // TODO: 386
+}
index 2de4e133bfbfbcd2ddabb58fcd05ccdf297e1c9f..9ec4252def2adf502300536b6a0326df9604bd5c 100644 (file)
@@ -442,6 +442,7 @@ var passes = [...]pass{
        {name: "insert resched checks", fn: insertLoopReschedChecks,
                disabled: objabi.Preemptibleloops_enabled == 0}, // insert resched checks in loops.
        {name: "lower", fn: lower, required: true},
+       {name: "addressing modes", fn: addressingModes, required: false},
        {name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
        {name: "lowered cse", fn: cse},
        {name: "elim unread autos", fn: elimUnreadAutos},
index c165fed48567d2e8fa7c6c727d9617ff5ac16283..1d24d780c63537587ae93ba39e13a23149c01248 100644 (file)
 (MOVWQZX x) && zeroUpper48Bits(x,3) -> x
 (MOVBQZX x) && zeroUpper56Bits(x,3) -> x
 
-(MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
-(MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
-(MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
-(MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem)
-(MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem)
-
 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
 (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQZX x)
 (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQZX x)
        && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
        ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 
-// generating indexed loads and stores
-(MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-       (MOV(B|W|L|Q|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-       (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOV(L|SS)load [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-       (MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOV(L|Q|SD)load [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-       (MOV(L|Q|SD)loadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-
-(MOV(B|W|L|Q|SS|SD)store [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-       (MOV(B|W|L|Q|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-       (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOV(L|SS)store [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-       (MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOV(L|Q|SD)store [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
-       (MOV(L|Q|SD)storeidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-
-(MOV(B|W|L|Q|SS|SD)load [off] {sym} (ADDQ ptr idx) mem) && ptr.Op != OpSB ->
-       (MOV(B|W|L|Q|SS|SD)loadidx1 [off] {sym} ptr idx mem)
-(MOV(B|W|L|Q|SS|SD)store [off] {sym} (ADDQ ptr idx) val mem) && ptr.Op != OpSB ->
-       (MOV(B|W|L|Q|SS|SD)storeidx1 [off] {sym} ptr idx val mem)
-
-(MOV(B|W|L|Q)storeconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
-       (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
-       (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
-       (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
-       (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-
-(MOV(B|W|L|Q)storeconst [x] {sym} (ADDQ ptr idx) mem) -> (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr idx mem)
-
-// combine SHLQ into indexed loads and stores
-(MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem)
-(MOV(L|SS)loadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOV(L|SS)loadidx4 [c] {sym} ptr idx mem)
-(MOV(L|Q|SD)loadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOV(L|Q|SD)loadidx8 [c] {sym} ptr idx mem)
-
-(MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem)
-(MOV(L|SS)storeidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem) -> (MOV(L|SS)storeidx4 [c] {sym} ptr idx val mem)
-(MOV(L|Q|SD)storeidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem) -> (MOV(L|Q|SD)storeidx8 [c] {sym} ptr idx val mem)
-(MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
-(MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
-(MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOVQstoreconstidx8 [c] {sym} ptr idx mem)
-
-// combine ADDQ into pointer of indexed loads and stores
-(MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem)
-(MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOVWloadidx2 [c+d] {sym} ptr idx mem)
-(MOV(L|SS)loadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|SS)loadidx4 [c+d] {sym} ptr idx mem)
-(MOV(L|Q|SD)loadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|Q|SD)loadidx8 [c+d] {sym} ptr idx mem)
-
-(MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem)
-(MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
-(MOV(L|SS)storeidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|SS)storeidx4 [c+d] {sym} ptr idx val mem)
-(MOV(L|Q|SD)storeidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|Q|SD)storeidx8 [c+d] {sym} ptr idx val mem)
-
-
-// combine ADDQ into index of indexed loads and stores
-(MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)  && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem)
-(MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem)  && is32Bit(c+2*d) -> (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
-(MOV(L|SS)loadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem)  && is32Bit(c+4*d) -> (MOV(L|SS)loadidx4 [c+4*d] {sym} ptr idx mem)
-(MOV(L|Q|SD)loadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)  && is32Bit(c+8*d) -> (MOV(L|Q|SD)loadidx8 [c+8*d] {sym} ptr idx mem)
-
-(MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)  && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem)
-(MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem)  && is32Bit(c+2*d) -> (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
-(MOV(L|SS)storeidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem)  && is32Bit(c+4*d) -> (MOV(L|SS)storeidx4 [c+4*d] {sym} ptr idx val mem)
-(MOV(L|Q|SD)storeidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)  && is32Bit(c+8*d) -> (MOV(L|Q|SD)storeidx8 [c+8*d] {sym} ptr idx val mem)
-
-(MOV(B|W|L|Q)storeconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-(MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-(MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-(MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-
-(MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-(MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(2*c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
-(MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(4*c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
-(MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(8*c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem)
-
 // fold LEAQs together
 (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
       (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x)
 (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
       (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
 
+// LEAQ[1248] into LEAQ[1248]. Only some such merges are possible.
+(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+      (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y)
+(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+      (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x)
+(LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+2*off2) && sym2 == nil ->
+      (LEAQ4 [off1+2*off2] {sym1} x y)
+(LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+4*off2) && sym2 == nil ->
+      (LEAQ8 [off1+4*off2] {sym1} x y)
+// TODO: more?
+
 // Absorb InvertFlags into branches.
 (LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
 (GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
 
 // Little-endian loads
 
-(ORL                  x0:(MOVBload [i0] {s} p mem)
-    sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+(ORL                  x0:(MOVBload [i0] {s} p0 mem)
+    sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem)))
   && i1 == i0+1
   && x0.Uses == 1
   && x1.Uses == 1
   && sh.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
 
-(ORQ                  x0:(MOVBload [i0] {s} p mem)
-    sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
+(ORQ                  x0:(MOVBload [i0] {s} p0 mem)
+    sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem)))
   && i1 == i0+1
   && x0.Uses == 1
   && x1.Uses == 1
   && sh.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+  -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
 
-(ORL                   x0:(MOVWload [i0] {s} p mem)
-    sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
+(ORL                   x0:(MOVWload [i0] {s} p0 mem)
+    sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem)))
   && i1 == i0+2
   && x0.Uses == 1
   && x1.Uses == 1
   && sh.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+  -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
 
-(ORQ                   x0:(MOVWload [i0] {s} p mem)
-    sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
+(ORQ                   x0:(MOVWload [i0] {s} p0 mem)
+    sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem)))
   && i1 == i0+2
   && x0.Uses == 1
   && x1.Uses == 1
   && sh.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+  -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
 
-(ORQ                   x0:(MOVLload [i0] {s} p mem)
-    sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
+(ORQ                   x0:(MOVLload [i0] {s} p0 mem)
+    sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem)))
   && i1 == i0+4
   && x0.Uses == 1
   && x1.Uses == 1
   && sh.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
+  -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem)
 
 (ORL
-    s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
+    s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem))
     or:(ORL
-        s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
+        s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem))
        y))
   && i1 == i0+1
   && j1 == j0+8
   && s0.Uses == 1
   && s1.Uses == 1
   && or.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+  -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
 
 (ORQ
-    s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
+    s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem))
     or:(ORQ
-        s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
+        s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem))
        y))
   && i1 == i0+1
   && j1 == j0+8
   && s0.Uses == 1
   && s1.Uses == 1
   && or.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
 
 (ORQ
-    s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem))
+    s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem))
     or:(ORQ
-        s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))
+        s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem))
        y))
   && i1 == i0+2
   && j1 == j0+16
   && s0.Uses == 1
   && s1.Uses == 1
   && or.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p0 mem)) y)
 
 // Little-endian indexed loads
 
-(ORL                  x0:(MOVBloadidx1 [i0] {s} p idx mem)
-    sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-  && i1 == i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-
-(ORQ                  x0:(MOVBloadidx1 [i0] {s} p idx mem)
-    sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-  && i1 == i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-
-(ORL                   x0:(MOVWloadidx1 [i0] {s} p idx mem)
-    sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-  && i1 == i0+2
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
-
-(ORQ                   x0:(MOVWloadidx1 [i0] {s} p idx mem)
-    sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-  && i1 == i0+2
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
-
-(ORQ                   x0:(MOVLloadidx1 [i0] {s} p idx mem)
-    sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
-  && i1 == i0+4
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
-
-(ORL
-    s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
-    or:(ORL
-        s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
-       y))
-  && i1 == i0+1
-  && j1 == j0+8
-  && j0 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
-
-(ORQ
-    s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
-    or:(ORQ
-        s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
-       y))
-  && i1 == i0+1
-  && j1 == j0+8
-  && j0 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
-
-(ORQ
-    s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem))
-    or:(ORQ
-        s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))
-       y))
-  && i1 == i0+2
-  && j1 == j0+16
-  && j0 % 32 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+// Move constants offsets from LEAQx up into load. This lets the above combining
+// rules discover indexed load-combining instances.
+(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
+(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
+(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
+(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
+
+(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
+(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
+(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
+(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
+-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
 
 // Big-endian loads
 
 (ORL
-                       x1:(MOVBload [i1] {s} p mem)
-    sh:(SHLLconst [8]  x0:(MOVBload [i0] {s} p mem)))
-  && i1 == i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
-
-(ORQ
-                       x1:(MOVBload [i1] {s} p mem)
-    sh:(SHLQconst [8]  x0:(MOVBload [i0] {s} p mem)))
-  && i1 == i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
-
-(ORL
-                        r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
-    sh:(SHLLconst [16]  r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
-  && i1 == i0+2
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
-
-(ORQ
-                        r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
-    sh:(SHLQconst [16]  r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
-  && i1 == i0+2
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
-
-(ORQ
-                        r1:(BSWAPL x1:(MOVLload [i1] {s} p mem))
-    sh:(SHLQconst [32]  r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
-  && i1 == i0+4
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
-
-(ORL
-    s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
-    or:(ORL
-        s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
-       y))
-  && i1 == i0+1
-  && j1 == j0-8
-  && j1 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-
-(ORQ
-    s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
-    or:(ORQ
-        s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
-       y))
-  && i1 == i0+1
-  && j1 == j0-8
-  && j1 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-
-(ORQ
-    s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))
-    or:(ORQ
-        s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
-       y))
-  && i1 == i0+2
-  && j1 == j0-16
-  && j1 % 32 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, r0, r1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y)
-
-// Big-endian indexed loads
-
-(ORL
-                       x1:(MOVBloadidx1 [i1] {s} p idx mem)
-    sh:(SHLLconst [8]  x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+                       x1:(MOVBload [i1] {s} p0 mem)
+    sh:(SHLLconst [8]  x0:(MOVBload [i0] {s} p1 mem)))
   && i1 == i0+1
   && x0.Uses == 1
   && x1.Uses == 1
   && sh.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
 
 (ORQ
-                       x1:(MOVBloadidx1 [i1] {s} p idx mem)
-    sh:(SHLQconst [8]  x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+                       x1:(MOVBload [i1] {s} p0 mem)
+    sh:(SHLQconst [8]  x0:(MOVBload [i0] {s} p1 mem)))
   && i1 == i0+1
   && x0.Uses == 1
   && x1.Uses == 1
   && sh.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, sh)
-  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+  -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
 
 (ORL
-                        r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
-    sh:(SHLLconst [16]  r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+                        r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem))
+    sh:(SHLLconst [16]  r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
   && i1 == i0+2
   && x0.Uses == 1
   && x1.Uses == 1
   && r0.Uses == 1
   && r1.Uses == 1
   && sh.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
 
 (ORQ
-                        r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
-    sh:(SHLQconst [16]  r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+                        r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem))
+    sh:(SHLQconst [16]  r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
   && i1 == i0+2
   && x0.Uses == 1
   && x1.Uses == 1
   && r0.Uses == 1
   && r1.Uses == 1
   && sh.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+  -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
 
 (ORQ
-                        r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem))
-    sh:(SHLQconst [32]  r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
+                        r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem))
+    sh:(SHLQconst [32]  r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem))))
   && i1 == i0+4
   && x0.Uses == 1
   && x1.Uses == 1
   && r0.Uses == 1
   && r1.Uses == 1
   && sh.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1) != nil
   && clobber(x0, x1, r0, r1, sh)
-  -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+  -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p0 mem))
 
 (ORL
-    s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
+    s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem))
     or:(ORL
-        s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
+        s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem))
        y))
   && i1 == i0+1
   && j1 == j0-8
   && s0.Uses == 1
   && s1.Uses == 1
   && or.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+  -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
 
 (ORQ
-    s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
+    s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem))
     or:(ORQ
-        s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
+        s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem))
        y))
   && i1 == i0+1
   && j1 == j0-8
   && s0.Uses == 1
   && s1.Uses == 1
   && or.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
 
 (ORQ
-    s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))
+    s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem)))
     or:(ORQ
-        s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+        s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem)))
        y))
   && i1 == i0+2
   && j1 == j0-16
   && s0.Uses == 1
   && s1.Uses == 1
   && or.Uses == 1
+  && same(p0, p1, 1)
   && mergePoint(b,x0,x1,y) != nil
   && clobber(x0, x1, r0, r1, s0, s1, or)
-  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+  -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p0 mem))) y)
 
 // Combine 2 byte stores + shift into rolw 8 + word store
-(MOVBstore [i] {s} p w
-  x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
+(MOVBstore [i] {s} p1 w
+  x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem))
   && x0.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x0)
-  -> (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
-
-(MOVBstoreidx1 [i] {s} p idx w
-  x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
-  && x0.Uses == 1
-  && clobber(x0)
-  -> (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
+  -> (MOVWstore [i-1] {s} p0 (ROLWconst <w.Type> [8] w) mem)
 
 // Combine stores + shifts into bswap and larger (unaligned) stores
-(MOVBstore [i] {s} p w
-  x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w)
-  x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w)
-  x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && x2.Uses == 1
-  && clobber(x0, x1, x2)
-  -> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
-
-(MOVBstoreidx1 [i] {s} p idx w
-  x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w)
-  x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w)
-  x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
+(MOVBstore [i] {s} p3 w
+  x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w)
+  x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w)
+  x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem))))
   && x0.Uses == 1
   && x1.Uses == 1
   && x2.Uses == 1
+  && same(p0, p1, 1)
+  && same(p1, p2, 1)
+  && same(p2, p3, 1)
   && clobber(x0, x1, x2)
-  -> (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
-
-(MOVBstore [i] {s} p w
-  x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w)
-  x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w)
-  x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w)
-  x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w)
-  x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w)
-  x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w)
-  x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && clobber(x0, x1, x2, x3, x4, x5, x6)
-  -> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
-
-(MOVBstoreidx1 [i] {s} p idx w
-  x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w)
-  x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w)
-  x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w)
-  x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w)
-  x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w)
-  x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w)
-  x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+  -> (MOVLstore [i-3] {s} p0 (BSWAPL <w.Type> w) mem)
+
+(MOVBstore [i] {s} p7 w
+  x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w)
+  x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w)
+  x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w)
+  x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w)
+  x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w)
+  x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w)
+  x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem))))))))
   && x0.Uses == 1
   && x1.Uses == 1
   && x2.Uses == 1
   && x4.Uses == 1
   && x5.Uses == 1
   && x6.Uses == 1
+  && same(p0, p1, 1)
+  && same(p1, p2, 1)
+  && same(p2, p3, 1)
+  && same(p3, p4, 1)
+  && same(p4, p5, 1)
+  && same(p5, p6, 1)
+  && same(p6, p7, 1)
   && clobber(x0, x1, x2, x3, x4, x5, x6)
-  -> (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+  -> (MOVQstore [i-7] {s} p0 (BSWAPQ <w.Type> w) mem)
 
 // Combine constant stores into larger (unaligned) stores.
-(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
+(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
   && clobber(x)
-  -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
-(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
+  -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
+(MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
   && clobber(x)
-  -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
-(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
+  -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
+(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
   && clobber(x)
-  -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
-(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
+  -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
+(MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
   && clobber(x)
-  -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
-(MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem))
+  -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
+(MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
   && clobber(x)
-  -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-(MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem))
+  -> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+(MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
   && clobber(x)
-  -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
+  -> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+(MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem))
   && config.useSSE
   && x.Uses == 1
+  && same(p0, p1, 1)
   && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off()
   && ValAndOff(c).Val() == 0
   && ValAndOff(c2).Val() == 0
   && clobber(x)
-  -> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem)
-
-(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
-  && x.Uses == 1
-  && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
-  && clobber(x)
-  -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
-(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
-  && x.Uses == 1
-  && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
-  && clobber(x)
-  -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
-(MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem))
-  && x.Uses == 1
-  && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
-  && clobber(x)
-  -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-
-(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
-  && x.Uses == 1
-  && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
-  && clobber(x)
-  -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem)
-(MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem))
-  && x.Uses == 1
-  && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
-  && clobber(x)
-  -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+  -> (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem)
 
 // Combine stores into larger (unaligned) stores.
-(MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstore [i-1] {s} p w mem)
-(MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstore [i] {s} p w mem)
-(MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstore [i-1] {s} p w0 mem)
-(MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVLstore [i-2] {s} p w mem)
-(MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVLstore [i-2] {s} p w0 mem)
-(MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVQstore [i-4] {s} p w mem)
-(MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVQstore [i-4] {s} p w0 mem)
-
-(MOVBstoreidx1 [i] {s} p idx (SHR(W|L|Q)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstoreidx1 [i-1] {s} p idx w mem)
-(MOVBstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHR(L|Q)const [j-8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
-(MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
+(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p idx w mem)
-(MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHR(L|Q)const [j-16] w) mem))
+  -> (MOVWstore [i-1] {s} p0 w mem)
+(MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHR(W|L|Q)const [8] w) mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
-(MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem))
+  -> (MOVWstore [i] {s} p0 w mem)
+(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVQstoreidx1 [i-4] {s} p idx w mem)
-(MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
+  -> (MOVWstore [i-1] {s} p0 w0 mem)
+(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
-
-(MOVWstoreidx2 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
+  -> (MOVLstore [i-2] {s} p0 w mem)
+(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
-(MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
+  -> (MOVLstore [i-2] {s} p0 w0 mem)
+(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem)
-(MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem))
+  -> (MOVQstore [i-4] {s} p0 w mem)
+(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem))
   && x.Uses == 1
+  && same(p0, p1, 1)
   && clobber(x)
-  -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem)
-(MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
+  -> (MOVQstore [i-4] {s} p0 w0 mem)
 
 (MOVBstore [i] {s} p
   x1:(MOVBload [j] {s2} p2 mem)
 (BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) -> (BSFQ (ORQconst <t> [1<<8] x))
 (BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) -> (BSFQ (ORQconst <t> [1<<16] x))
 
-// Simplify indexed loads/stores
-(MOVBstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVBstore [i+c] {s} p w mem)
-(MOVWstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVWstore [i+c] {s} p w mem)
-(MOVLstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVLstore [i+c] {s} p w mem)
-(MOVQstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVQstore [i+c] {s} p w mem)
-(MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+2*c) -> (MOVWstore [i+2*c] {s} p w mem)
-(MOVLstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVLstore [i+4*c] {s} p w mem)
-(MOVLstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVLstore [i+8*c] {s} p w mem)
-(MOVQstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVQstore [i+8*c] {s} p w mem)
-(MOVSSstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSSstore [i+c] {s} p w mem)
-(MOVSSstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVSSstore [i+4*c] {s} p w mem)
-(MOVSDstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSDstore [i+c] {s} p w mem)
-(MOVSDstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVSDstore [i+8*c] {s} p w mem)
-(MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVBload [i+c] {s} p mem)
-(MOVWloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVWload [i+c] {s} p mem)
-(MOVLloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVLload [i+c] {s} p mem)
-(MOVQloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVQload [i+c] {s} p mem)
-(MOVWloadidx2 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+2*c) -> (MOVWload [i+2*c] {s} p mem)
-(MOVLloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVLload [i+4*c] {s} p mem)
-(MOVLloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVLload [i+8*c] {s} p mem)
-(MOVQloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVQload [i+8*c] {s} p mem)
-(MOVSSloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSSload [i+c] {s} p mem)
-(MOVSSloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVSSload [i+4*c] {s} p mem)
-(MOVSDloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSDload [i+c] {s} p mem)
-(MOVSDloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVSDload [i+8*c] {s} p mem)
-
-// Combine consts into storeidx.
-// Note that when c == 0, it takes more bytes to encode
-// the immediate $0 than to zero a register and use it.
-// We do the rewrite anyway, to minimize register pressure.
-(MOVBstoreidx1     [off] {s} ptr idx (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)), off)  -> (MOVBstoreconstidx1     [makeValAndOff(int64(int8(c)), off)]  {s} ptr idx mem)
-(MOVWstoreidx(1|2) [off] {s} ptr idx (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)), off) -> (MOVWstoreconstidx(1|2) [makeValAndOff(int64(int16(c)), off)] {s} ptr idx mem)
-(MOVLstoreidx(1|4) [off] {s} ptr idx (MOVQconst [c]) mem) && validValAndOff(int64(int32(c)), off) -> (MOVLstoreconstidx(1|4) [makeValAndOff(int64(int32(c)), off)] {s} ptr idx mem)
-(MOVQstoreidx(1|8) [off] {s} ptr idx (MOVQconst [c]) mem) && validValAndOff(c, off)               -> (MOVQstoreconstidx(1|8) [makeValAndOff(c, off)]               {s} ptr idx mem)
-
 // Redundant sign/zero extensions
 // Note: see issue 21963. We have to make sure we use the right type on
 // the resulting extension (the outer type, not the inner type).
index 238e2430963ee3e25dfb65395bc21e9f5744ef2f..b3e7d347797b96ae375ce8e8e8e7cfc0529cc479 100644 (file)
@@ -1247,3 +1247,43 @@ func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
        copy(buf, src)
        return byteorder.Uint64(buf)
 }
+
+// same reports whether x and y are the same value.
+// It checks to a maximum depth of d, so it may report
+// a false negative.
+func same(x, y *Value, depth int) bool {
+       if x == y {
+               return true
+       }
+       if depth <= 0 {
+               return false
+       }
+       if x.Op != y.Op || x.Aux != y.Aux || x.AuxInt != y.AuxInt {
+               return false
+       }
+       if len(x.Args) != len(y.Args) {
+               return false
+       }
+       if opcodeTable[x.Op].commutative {
+               // Check exchanged ordering first.
+               for i, a := range x.Args {
+                       j := i
+                       if j < 2 {
+                               j ^= 1
+                       }
+                       b := y.Args[j]
+                       if !same(a, b, depth-1) {
+                               goto checkNormalOrder
+                       }
+               }
+               return true
+       checkNormalOrder:
+       }
+       for i, a := range x.Args {
+               b := y.Args[i]
+               if !same(a, b, depth-1) {
+                       return false
+               }
+       }
+       return true
+}
index c37bae2c22cf6cd1bf72a88d006038fab3788a30..bd1f4c08e2321fa1522416acb308cbf9bbc875ef 100644 (file)
@@ -246,16 +246,10 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64MOVBatomicload(v)
        case OpAMD64MOVBload:
                return rewriteValueAMD64_OpAMD64MOVBload(v)
-       case OpAMD64MOVBloadidx1:
-               return rewriteValueAMD64_OpAMD64MOVBloadidx1(v)
        case OpAMD64MOVBstore:
                return rewriteValueAMD64_OpAMD64MOVBstore(v)
        case OpAMD64MOVBstoreconst:
                return rewriteValueAMD64_OpAMD64MOVBstoreconst(v)
-       case OpAMD64MOVBstoreconstidx1:
-               return rewriteValueAMD64_OpAMD64MOVBstoreconstidx1(v)
-       case OpAMD64MOVBstoreidx1:
-               return rewriteValueAMD64_OpAMD64MOVBstoreidx1(v)
        case OpAMD64MOVLQSX:
                return rewriteValueAMD64_OpAMD64MOVLQSX(v)
        case OpAMD64MOVLQSXload:
@@ -270,26 +264,10 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64MOVLi2f(v)
        case OpAMD64MOVLload:
                return rewriteValueAMD64_OpAMD64MOVLload(v)
-       case OpAMD64MOVLloadidx1:
-               return rewriteValueAMD64_OpAMD64MOVLloadidx1(v)
-       case OpAMD64MOVLloadidx4:
-               return rewriteValueAMD64_OpAMD64MOVLloadidx4(v)
-       case OpAMD64MOVLloadidx8:
-               return rewriteValueAMD64_OpAMD64MOVLloadidx8(v)
        case OpAMD64MOVLstore:
                return rewriteValueAMD64_OpAMD64MOVLstore(v)
        case OpAMD64MOVLstoreconst:
                return rewriteValueAMD64_OpAMD64MOVLstoreconst(v)
-       case OpAMD64MOVLstoreconstidx1:
-               return rewriteValueAMD64_OpAMD64MOVLstoreconstidx1(v)
-       case OpAMD64MOVLstoreconstidx4:
-               return rewriteValueAMD64_OpAMD64MOVLstoreconstidx4(v)
-       case OpAMD64MOVLstoreidx1:
-               return rewriteValueAMD64_OpAMD64MOVLstoreidx1(v)
-       case OpAMD64MOVLstoreidx4:
-               return rewriteValueAMD64_OpAMD64MOVLstoreidx4(v)
-       case OpAMD64MOVLstoreidx8:
-               return rewriteValueAMD64_OpAMD64MOVLstoreidx8(v)
        case OpAMD64MOVOload:
                return rewriteValueAMD64_OpAMD64MOVOload(v)
        case OpAMD64MOVOstore:
@@ -302,46 +280,18 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64MOVQi2f(v)
        case OpAMD64MOVQload:
                return rewriteValueAMD64_OpAMD64MOVQload(v)
-       case OpAMD64MOVQloadidx1:
-               return rewriteValueAMD64_OpAMD64MOVQloadidx1(v)
-       case OpAMD64MOVQloadidx8:
-               return rewriteValueAMD64_OpAMD64MOVQloadidx8(v)
        case OpAMD64MOVQstore:
                return rewriteValueAMD64_OpAMD64MOVQstore(v)
        case OpAMD64MOVQstoreconst:
                return rewriteValueAMD64_OpAMD64MOVQstoreconst(v)
-       case OpAMD64MOVQstoreconstidx1:
-               return rewriteValueAMD64_OpAMD64MOVQstoreconstidx1(v)
-       case OpAMD64MOVQstoreconstidx8:
-               return rewriteValueAMD64_OpAMD64MOVQstoreconstidx8(v)
-       case OpAMD64MOVQstoreidx1:
-               return rewriteValueAMD64_OpAMD64MOVQstoreidx1(v)
-       case OpAMD64MOVQstoreidx8:
-               return rewriteValueAMD64_OpAMD64MOVQstoreidx8(v)
        case OpAMD64MOVSDload:
                return rewriteValueAMD64_OpAMD64MOVSDload(v)
-       case OpAMD64MOVSDloadidx1:
-               return rewriteValueAMD64_OpAMD64MOVSDloadidx1(v)
-       case OpAMD64MOVSDloadidx8:
-               return rewriteValueAMD64_OpAMD64MOVSDloadidx8(v)
        case OpAMD64MOVSDstore:
                return rewriteValueAMD64_OpAMD64MOVSDstore(v)
-       case OpAMD64MOVSDstoreidx1:
-               return rewriteValueAMD64_OpAMD64MOVSDstoreidx1(v)
-       case OpAMD64MOVSDstoreidx8:
-               return rewriteValueAMD64_OpAMD64MOVSDstoreidx8(v)
        case OpAMD64MOVSSload:
                return rewriteValueAMD64_OpAMD64MOVSSload(v)
-       case OpAMD64MOVSSloadidx1:
-               return rewriteValueAMD64_OpAMD64MOVSSloadidx1(v)
-       case OpAMD64MOVSSloadidx4:
-               return rewriteValueAMD64_OpAMD64MOVSSloadidx4(v)
        case OpAMD64MOVSSstore:
                return rewriteValueAMD64_OpAMD64MOVSSstore(v)
-       case OpAMD64MOVSSstoreidx1:
-               return rewriteValueAMD64_OpAMD64MOVSSstoreidx1(v)
-       case OpAMD64MOVSSstoreidx4:
-               return rewriteValueAMD64_OpAMD64MOVSSstoreidx4(v)
        case OpAMD64MOVWQSX:
                return rewriteValueAMD64_OpAMD64MOVWQSX(v)
        case OpAMD64MOVWQSXload:
@@ -350,22 +300,10 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64MOVWQZX(v)
        case OpAMD64MOVWload:
                return rewriteValueAMD64_OpAMD64MOVWload(v)
-       case OpAMD64MOVWloadidx1:
-               return rewriteValueAMD64_OpAMD64MOVWloadidx1(v)
-       case OpAMD64MOVWloadidx2:
-               return rewriteValueAMD64_OpAMD64MOVWloadidx2(v)
        case OpAMD64MOVWstore:
                return rewriteValueAMD64_OpAMD64MOVWstore(v)
        case OpAMD64MOVWstoreconst:
                return rewriteValueAMD64_OpAMD64MOVWstoreconst(v)
-       case OpAMD64MOVWstoreconstidx1:
-               return rewriteValueAMD64_OpAMD64MOVWstoreconstidx1(v)
-       case OpAMD64MOVWstoreconstidx2:
-               return rewriteValueAMD64_OpAMD64MOVWstoreconstidx2(v)
-       case OpAMD64MOVWstoreidx1:
-               return rewriteValueAMD64_OpAMD64MOVWstoreidx1(v)
-       case OpAMD64MOVWstoreidx2:
-               return rewriteValueAMD64_OpAMD64MOVWstoreidx2(v)
        case OpAMD64MULL:
                return rewriteValueAMD64_OpAMD64MULL(v)
        case OpAMD64MULLconst:
@@ -9347,6 +9285,64 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
                }
                break
        }
+       // match: (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64LEAQ1 {
+                               continue
+                       }
+                       off2 := v_1.AuxInt
+                       sym2 := v_1.Aux
+                       y := v_1.Args[1]
+                       if y != v_1.Args[0] || !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                               continue
+                       }
+                       v.reset(OpAMD64LEAQ2)
+                       v.AuxInt = off1 + off2
+                       v.Aux = mergeSym(sym1, sym2)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
+       // match: (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64LEAQ1 {
+                               continue
+                       }
+                       off2 := v_1.AuxInt
+                       sym2 := v_1.Aux
+                       _ = v_1.Args[1]
+                       v_1_0 := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
+                               if x != v_1_0 {
+                                       continue
+                               }
+                               y := v_1_1
+                               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                                       continue
+                               }
+                               v.reset(OpAMD64LEAQ2)
+                               v.AuxInt = off1 + off2
+                               v.Aux = mergeSym(sym1, sym2)
+                               v.AddArg2(y, x)
+                               return true
+                       }
+               }
+               break
+       }
        // match: (LEAQ1 [0] x y)
        // cond: v.Aux == nil
        // result: (ADDQ x y)
@@ -9464,6 +9460,28 @@ func rewriteValueAMD64_OpAMD64LEAQ2(v *Value) bool {
                v.AddArg2(x, y)
                return true
        }
+       // match: (LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y))
+       // cond: is32Bit(off1+2*off2) && sym2 == nil
+       // result: (LEAQ4 [off1+2*off2] {sym1} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               x := v_0
+               if v_1.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               y := v_1.Args[1]
+               if y != v_1.Args[0] || !(is32Bit(off1+2*off2) && sym2 == nil) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = off1 + 2*off2
+               v.Aux = sym1
+               v.AddArg2(x, y)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64LEAQ4(v *Value) bool {
@@ -9549,6 +9567,28 @@ func rewriteValueAMD64_OpAMD64LEAQ4(v *Value) bool {
                v.AddArg2(x, y)
                return true
        }
+       // match: (LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y))
+       // cond: is32Bit(off1+4*off2) && sym2 == nil
+       // result: (LEAQ8 [off1+4*off2] {sym1} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               x := v_0
+               if v_1.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               y := v_1.Args[1]
+               if y != v_1.Args[0] || !(is32Bit(off1+4*off2) && sym2 == nil) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = off1 + 4*off2
+               v.Aux = sym1
+               v.AddArg2(x, y)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64LEAQ8(v *Value) bool {
@@ -9899,30 +9939,6 @@ func rewriteValueAMD64_OpAMD64MOVBQZX(v *Value) bool {
                v.copyOf(x)
                return true
        }
-       // match: (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVBloadidx1 {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBloadidx1, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg3(ptr, idx, mem)
-               return true
-       }
        // match: (MOVBQZX (ANDLconst [c] x))
        // result: (ANDLconst [c & 0xff] x)
        for {
@@ -10000,6 +10016,7 @@ func rewriteValueAMD64_OpAMD64MOVBatomicload(v *Value) bool {
 func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
+       b := v.Block
        // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
        // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
        // result: (MOVBQZX x)
@@ -10064,55 +10081,117 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
                v.AddArg2(base, mem)
                return true
        }
-       // match: (MOVBload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVBload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVBload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
                mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       // match: (MOVBload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVBloadidx1 [off] {sym} ptr idx mem)
+       // match: (MOVBload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVBload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ2 {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       mem := v_1
-                       if !(ptr.Op != OpSB) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVBloadidx1)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               mem := v_1
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
                }
-               break
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
+               return true
+       }
+       // match: (MOVBload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVBload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
+       for {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ4 {
+                       break
+               }
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               mem := v_1
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
+               return true
+       }
+       // match: (MOVBload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVBload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
+       for {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ8 {
+                       break
+               }
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               mem := v_1
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
+               return true
        }
        // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
@@ -10172,86 +10251,6 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBloadidx1(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       mem := v_2
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVBloadidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVBloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVBloadidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVBload [i+c] {s} p mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       if v_1.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       c := v_1.AuxInt
-                       mem := v_2
-                       if !(is32Bit(i + c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVBload)
-                       v.AuxInt = i + c
-                       v.Aux = s
-                       v.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
@@ -10599,103 +10598,163 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVBstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (MOVBstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVBstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
                val := v_1
                mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVBstoreidx1 [off] {sym} ptr idx val mem)
+       // match: (MOVBstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVBstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ2 {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       val := v_1
-                       mem := v_2
-                       if !(ptr.Op != OpSB) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVBstoreidx1)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
+               mem := v_2
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
                }
-               break
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
-       // cond: x0.Uses == 1 && clobber(x0)
-       // result: (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
+       // match: (MOVBstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVBstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-1 || x0.Aux != s {
-                       break
-               }
-               mem := x0.Args[2]
-               if p != x0.Args[0] {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ4 {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && clobber(x0)) {
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
+               mem := v_2
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i - 1
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
+               return true
+       }
+       // match: (MOVBstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVBstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
+       for {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ8 {
+                       break
+               }
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
+               mem := v_2
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} p1 w x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem))
+       // cond: x0.Uses == 1 && same(p0, p1, 1) && clobber(x0)
+       // result: (MOVWstore [i-1] {s} p0 (ROLWconst <w.Type> [8] w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               p1 := v_0
+               w := v_1
+               x0 := v_2
+               if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-1 || x0.Aux != s {
+                       break
+               }
+               mem := x0.Args[2]
+               p0 := x0.Args[0]
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && same(p0, p1, 1) && clobber(x0)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i - 1
                v.Aux = s
                v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, w.Type)
                v0.AuxInt = 8
                v0.AddArg(w)
-               v.AddArg3(p, v0, mem)
+               v.AddArg3(p0, v0, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)
-       // result: (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
+       // match: (MOVBstore [i] {s} p3 w x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && clobber(x0, x1, x2)
+       // result: (MOVLstore [i-3] {s} p0 (BSWAPL <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p3 := v_0
                w := v_1
                x2 := v_2
                if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i-1 || x2.Aux != s {
                        break
                }
                _ = x2.Args[2]
-               if p != x2.Args[0] {
-                       break
-               }
+               p2 := x2.Args[0]
                x2_1 := x2.Args[1]
                if x2_1.Op != OpAMD64SHRLconst || x2_1.AuxInt != 8 || w != x2_1.Args[0] {
                        break
@@ -10705,9 +10764,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                _ = x1.Args[2]
-               if p != x1.Args[0] {
-                       break
-               }
+               p1 := x1.Args[0]
                x1_1 := x1.Args[1]
                if x1_1.Op != OpAMD64SHRLconst || x1_1.AuxInt != 16 || w != x1_1.Args[0] {
                        break
@@ -10717,11 +10774,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                mem := x0.Args[2]
-               if p != x0.Args[0] {
-                       break
-               }
+               p0 := x0.Args[0]
                x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) {
+               if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && clobber(x0, x1, x2)) {
                        break
                }
                v.reset(OpAMD64MOVLstore)
@@ -10729,25 +10784,23 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.Aux = s
                v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, w.Type)
                v0.AddArg(w)
-               v.AddArg3(p, v0, mem)
+               v.AddArg3(p0, v0, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)
-       // result: (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
+       // match: (MOVBstore [i] {s} p7 w x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && same(p3, p4, 1) && same(p4, p5, 1) && same(p5, p6, 1) && same(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)
+       // result: (MOVQstore [i-7] {s} p0 (BSWAPQ <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p7 := v_0
                w := v_1
                x6 := v_2
                if x6.Op != OpAMD64MOVBstore || x6.AuxInt != i-1 || x6.Aux != s {
                        break
                }
                _ = x6.Args[2]
-               if p != x6.Args[0] {
-                       break
-               }
+               p6 := x6.Args[0]
                x6_1 := x6.Args[1]
                if x6_1.Op != OpAMD64SHRQconst || x6_1.AuxInt != 8 || w != x6_1.Args[0] {
                        break
@@ -10757,9 +10810,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                _ = x5.Args[2]
-               if p != x5.Args[0] {
-                       break
-               }
+               p5 := x5.Args[0]
                x5_1 := x5.Args[1]
                if x5_1.Op != OpAMD64SHRQconst || x5_1.AuxInt != 16 || w != x5_1.Args[0] {
                        break
@@ -10769,9 +10820,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                _ = x4.Args[2]
-               if p != x4.Args[0] {
-                       break
-               }
+               p4 := x4.Args[0]
                x4_1 := x4.Args[1]
                if x4_1.Op != OpAMD64SHRQconst || x4_1.AuxInt != 24 || w != x4_1.Args[0] {
                        break
@@ -10781,9 +10830,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                _ = x3.Args[2]
-               if p != x3.Args[0] {
-                       break
-               }
+               p3 := x3.Args[0]
                x3_1 := x3.Args[1]
                if x3_1.Op != OpAMD64SHRQconst || x3_1.AuxInt != 32 || w != x3_1.Args[0] {
                        break
@@ -10793,9 +10840,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                _ = x2.Args[2]
-               if p != x2.Args[0] {
-                       break
-               }
+               p2 := x2.Args[0]
                x2_1 := x2.Args[1]
                if x2_1.Op != OpAMD64SHRQconst || x2_1.AuxInt != 40 || w != x2_1.Args[0] {
                        break
@@ -10805,9 +10850,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                _ = x1.Args[2]
-               if p != x1.Args[0] {
-                       break
-               }
+               p1 := x1.Args[0]
                x1_1 := x1.Args[1]
                if x1_1.Op != OpAMD64SHRQconst || x1_1.AuxInt != 48 || w != x1_1.Args[0] {
                        break
@@ -10817,11 +10860,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                mem := x0.Args[2]
-               if p != x0.Args[0] {
-                       break
-               }
+               p0 := x0.Args[0]
                x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)) {
+               if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && same(p3, p4, 1) && same(p4, p5, 1) && same(p5, p6, 1) && same(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)) {
                        break
                }
                v.reset(OpAMD64MOVQstore)
@@ -10829,16 +10870,16 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.Aux = s
                v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, w.Type)
                v0.AddArg(w)
-               v.AddArg3(p, v0, mem)
+               v.AddArg3(p0, v0, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w mem)
+       // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVWstore [i-1] {s} p0 w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                if v_1.Op != OpAMD64SHRWconst || v_1.AuxInt != 8 {
                        break
                }
@@ -10848,22 +10889,23 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               p0 := x.Args[0]
+               if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVWstore)
                v.AuxInt = i - 1
                v.Aux = s
-               v.AddArg3(p, w, mem)
+               v.AddArg3(p0, w, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w mem)
+       // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVWstore [i-1] {s} p0 w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 8 {
                        break
                }
@@ -10873,22 +10915,23 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               p0 := x.Args[0]
+               if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVWstore)
                v.AuxInt = i - 1
                v.Aux = s
-               v.AddArg3(p, w, mem)
+               v.AddArg3(p0, w, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w mem)
+       // match: (MOVBstore [i] {s} p1 (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVWstore [i-1] {s} p0 w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 8 {
                        break
                }
@@ -10898,100 +10941,95 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               p0 := x.Args[0]
+               if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVWstore)
                v.AuxInt = i - 1
                v.Aux = s
-               v.AddArg3(p, w, mem)
+               v.AddArg3(p0, w, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRWconst [8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i] {s} p w mem)
+       // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRWconst [8] w) mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVWstore [i] {s} p0 w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                w := v_1
                x := v_2
                if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s {
                        break
                }
                mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
+               p0 := x.Args[0]
                x_1 := x.Args[1]
-               if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVWstore)
                v.AuxInt = i
                v.Aux = s
-               v.AddArg3(p, w, mem)
+               v.AddArg3(p0, w, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRLconst [8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i] {s} p w mem)
+       // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRLconst [8] w) mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVWstore [i] {s} p0 w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                w := v_1
                x := v_2
                if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s {
                        break
                }
                mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
+               p0 := x.Args[0]
                x_1 := x.Args[1]
-               if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVWstore)
                v.AuxInt = i
                v.Aux = s
-               v.AddArg3(p, w, mem)
+               v.AddArg3(p0, w, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRQconst [8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i] {s} p w mem)
+       // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRQconst [8] w) mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVWstore [i] {s} p0 w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                w := v_1
                x := v_2
                if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s {
                        break
                }
                mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
+               p0 := x.Args[0]
                x_1 := x.Args[1]
-               if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVWstore)
                v.AuxInt = i
                v.Aux = s
-               v.AddArg3(p, w, mem)
+               v.AddArg3(p0, w, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w0 mem)
+       // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVWstore [i-1] {s} p0 w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                if v_1.Op != OpAMD64SHRLconst {
                        break
                }
@@ -11002,26 +11040,24 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
+               p0 := x.Args[0]
                w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVWstore)
                v.AuxInt = i - 1
                v.Aux = s
-               v.AddArg3(p, w0, mem)
+               v.AddArg3(p0, w0, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w0 mem)
+       // match: (MOVBstore [i] {s} p1 (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRQconst [j-8] w) mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVWstore [i-1] {s} p0 w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                if v_1.Op != OpAMD64SHRQconst {
                        break
                }
@@ -11032,17 +11068,15 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                        break
                }
                mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
+               p0 := x.Args[0]
                w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVWstore)
                v.AuxInt = i - 1
                v.Aux = s
-               v.AddArg3(p, w0, mem)
+               v.AddArg3(p0, w0, mem)
                return true
        }
        // match: (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) mem2:(MOVBstore [i-1] {s} p x2:(MOVBload [j-1] {s2} p2 mem) mem))
@@ -11179,53 +11213,13 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool {
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVBstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
-                       break
-               }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVBstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVBstoreconst [x] {sym} (ADDQ ptr idx) mem)
-       // result: (MOVBstoreconstidx1 [x] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               v.reset(OpAMD64MOVBstoreconstidx1)
-               v.AuxInt = x
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
+       // match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
        for {
                c := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                x := v_1
                if x.Op != OpAMD64MOVBstoreconst {
                        break
@@ -11235,22 +11229,23 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool {
                        break
                }
                mem := x.Args[1]
-               if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
+               p0 := x.Args[0]
+               if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVWstoreconst)
                v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
                v.Aux = s
-               v.AddArg2(p, mem)
+               v.AddArg2(p0, mem)
                return true
        }
-       // match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
+       // match: (MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
        for {
                a := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                x := v_1
                if x.Op != OpAMD64MOVBstoreconst {
                        break
@@ -11260,13 +11255,14 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool {
                        break
                }
                mem := x.Args[1]
-               if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
+               p0 := x.Args[0]
+               if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVWstoreconst)
                v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
                v.Aux = s
-               v.AddArg2(p, mem)
+               v.AddArg2(p0, mem)
                return true
        }
        // match: (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
@@ -11314,704 +11310,96 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBstoreconstidx1(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (MOVBstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       b := v.Block
+       // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               x := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       c := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       mem := v_2
-                       if !(ValAndOff(x).canAdd(c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVBstoreconstidx1)
-                       v.AuxInt = ValAndOff(x).add(c)
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
+               x := v_0
+               if x.Op != OpAMD64MOVLload {
+                       break
                }
-               break
+               off := x.AuxInt
+               sym := x.Aux
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVBstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               x := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       c := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       if !(ValAndOff(x).canAdd(c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVBstoreconstidx1)
-                       v.AuxInt = ValAndOff(x).add(c)
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
+               x := v_0
+               if x.Op != OpAMD64MOVQload {
+                       break
                }
-               break
+               off := x.AuxInt
+               sym := x.Aux
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
+       // match: (MOVLQSX (ANDLconst [c] x))
+       // cond: c & 0x80000000 == 0
+       // result: (ANDLconst [c & 0x7fffffff] x)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       i := v_1
-                       x := v_2
-                       if x.Op != OpAMD64MOVBstoreconstidx1 {
-                               continue
-                       }
-                       a := x.AuxInt
-                       if x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[2]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || i != x_1 || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVWstoreconstidx1)
-                               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
-                               v.Aux = s
-                               v.AddArg3(p, i, mem)
-                               return true
-                       }
+               if v_0.Op != OpAMD64ANDLconst {
+                       break
                }
-               break
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c&0x80000000 == 0) {
+                       break
+               }
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0x7fffffff
+               v.AddArg(x)
+               return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVBstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (MOVLQSX (MOVLQSX x))
+       // result: (MOVLQSX x)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       val := v_2
-                       mem := v_3
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVBstoreidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
+               if v_0.Op != OpAMD64MOVLQSX {
+                       break
                }
-               break
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVLQSX)
+               v.AddArg(x)
+               return true
        }
-       // match: (MOVBstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (MOVLQSX (MOVWQSX x))
+       // result: (MOVWQSX x)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       val := v_2
-                       mem := v_3
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVBstoreidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
+               if v_0.Op != OpAMD64MOVWQSX {
+                       break
                }
-               break
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVWQSX)
+               v.AddArg(x)
+               return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx w x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
-       // cond: x0.Uses == 1 && clobber(x0)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       w := v_2
-                       x0 := v_3
-                       if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-1 || x0.Aux != s {
-                               continue
-                       }
-                       mem := x0.Args[3]
-                       x0_0 := x0.Args[0]
-                       x0_1 := x0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 {
-                               if p != x0_0 || idx != x0_1 {
-                                       continue
-                               }
-                               x0_2 := x0.Args[2]
-                               if x0_2.Op != OpAMD64SHRWconst || x0_2.AuxInt != 8 || w != x0_2.Args[0] || !(x0.Uses == 1 && clobber(x0)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVWstoreidx1)
-                               v.AuxInt = i - 1
-                               v.Aux = s
-                               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
-                               v0.AuxInt = 8
-                               v0.AddArg(w)
-                               v.AddArg4(p, idx, v0, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)
-       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       w := v_2
-                       x2 := v_3
-                       if x2.Op != OpAMD64MOVBstoreidx1 || x2.AuxInt != i-1 || x2.Aux != s {
-                               continue
-                       }
-                       _ = x2.Args[3]
-                       x2_0 := x2.Args[0]
-                       x2_1 := x2.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x2_0, x2_1 = _i1+1, x2_1, x2_0 {
-                               if p != x2_0 || idx != x2_1 {
-                                       continue
-                               }
-                               x2_2 := x2.Args[2]
-                               if x2_2.Op != OpAMD64SHRLconst || x2_2.AuxInt != 8 || w != x2_2.Args[0] {
-                                       continue
-                               }
-                               x1 := x2.Args[3]
-                               if x1.Op != OpAMD64MOVBstoreidx1 || x1.AuxInt != i-2 || x1.Aux != s {
-                                       continue
-                               }
-                               _ = x1.Args[3]
-                               x1_0 := x1.Args[0]
-                               x1_1 := x1.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 {
-                                       if p != x1_0 || idx != x1_1 {
-                                               continue
-                                       }
-                                       x1_2 := x1.Args[2]
-                                       if x1_2.Op != OpAMD64SHRLconst || x1_2.AuxInt != 16 || w != x1_2.Args[0] {
-                                               continue
-                                       }
-                                       x0 := x1.Args[3]
-                                       if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-3 || x0.Aux != s {
-                                               continue
-                                       }
-                                       mem := x0.Args[3]
-                                       x0_0 := x0.Args[0]
-                                       x0_1 := x0.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, x0_0, x0_1 = _i3+1, x0_1, x0_0 {
-                                               if p != x0_0 || idx != x0_1 {
-                                                       continue
-                                               }
-                                               x0_2 := x0.Args[2]
-                                               if x0_2.Op != OpAMD64SHRLconst || x0_2.AuxInt != 24 || w != x0_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) {
-                                                       continue
-                                               }
-                                               v.reset(OpAMD64MOVLstoreidx1)
-                                               v.AuxInt = i - 3
-                                               v.Aux = s
-                                               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
-                                               v0.AddArg(w)
-                                               v.AddArg4(p, idx, v0, mem)
-                                               return true
-                                       }
-                               }
-                       }
-               }
-               break
-       }
-       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)
-       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       w := v_2
-                       x6 := v_3
-                       if x6.Op != OpAMD64MOVBstoreidx1 || x6.AuxInt != i-1 || x6.Aux != s {
-                               continue
-                       }
-                       _ = x6.Args[3]
-                       x6_0 := x6.Args[0]
-                       x6_1 := x6.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x6_0, x6_1 = _i1+1, x6_1, x6_0 {
-                               if p != x6_0 || idx != x6_1 {
-                                       continue
-                               }
-                               x6_2 := x6.Args[2]
-                               if x6_2.Op != OpAMD64SHRQconst || x6_2.AuxInt != 8 || w != x6_2.Args[0] {
-                                       continue
-                               }
-                               x5 := x6.Args[3]
-                               if x5.Op != OpAMD64MOVBstoreidx1 || x5.AuxInt != i-2 || x5.Aux != s {
-                                       continue
-                               }
-                               _ = x5.Args[3]
-                               x5_0 := x5.Args[0]
-                               x5_1 := x5.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x5_0, x5_1 = _i2+1, x5_1, x5_0 {
-                                       if p != x5_0 || idx != x5_1 {
-                                               continue
-                                       }
-                                       x5_2 := x5.Args[2]
-                                       if x5_2.Op != OpAMD64SHRQconst || x5_2.AuxInt != 16 || w != x5_2.Args[0] {
-                                               continue
-                                       }
-                                       x4 := x5.Args[3]
-                                       if x4.Op != OpAMD64MOVBstoreidx1 || x4.AuxInt != i-3 || x4.Aux != s {
-                                               continue
-                                       }
-                                       _ = x4.Args[3]
-                                       x4_0 := x4.Args[0]
-                                       x4_1 := x4.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, x4_0, x4_1 = _i3+1, x4_1, x4_0 {
-                                               if p != x4_0 || idx != x4_1 {
-                                                       continue
-                                               }
-                                               x4_2 := x4.Args[2]
-                                               if x4_2.Op != OpAMD64SHRQconst || x4_2.AuxInt != 24 || w != x4_2.Args[0] {
-                                                       continue
-                                               }
-                                               x3 := x4.Args[3]
-                                               if x3.Op != OpAMD64MOVBstoreidx1 || x3.AuxInt != i-4 || x3.Aux != s {
-                                                       continue
-                                               }
-                                               _ = x3.Args[3]
-                                               x3_0 := x3.Args[0]
-                                               x3_1 := x3.Args[1]
-                                               for _i4 := 0; _i4 <= 1; _i4, x3_0, x3_1 = _i4+1, x3_1, x3_0 {
-                                                       if p != x3_0 || idx != x3_1 {
-                                                               continue
-                                                       }
-                                                       x3_2 := x3.Args[2]
-                                                       if x3_2.Op != OpAMD64SHRQconst || x3_2.AuxInt != 32 || w != x3_2.Args[0] {
-                                                               continue
-                                                       }
-                                                       x2 := x3.Args[3]
-                                                       if x2.Op != OpAMD64MOVBstoreidx1 || x2.AuxInt != i-5 || x2.Aux != s {
-                                                               continue
-                                                       }
-                                                       _ = x2.Args[3]
-                                                       x2_0 := x2.Args[0]
-                                                       x2_1 := x2.Args[1]
-                                                       for _i5 := 0; _i5 <= 1; _i5, x2_0, x2_1 = _i5+1, x2_1, x2_0 {
-                                                               if p != x2_0 || idx != x2_1 {
-                                                                       continue
-                                                               }
-                                                               x2_2 := x2.Args[2]
-                                                               if x2_2.Op != OpAMD64SHRQconst || x2_2.AuxInt != 40 || w != x2_2.Args[0] {
-                                                                       continue
-                                                               }
-                                                               x1 := x2.Args[3]
-                                                               if x1.Op != OpAMD64MOVBstoreidx1 || x1.AuxInt != i-6 || x1.Aux != s {
-                                                                       continue
-                                                               }
-                                                               _ = x1.Args[3]
-                                                               x1_0 := x1.Args[0]
-                                                               x1_1 := x1.Args[1]
-                                                               for _i6 := 0; _i6 <= 1; _i6, x1_0, x1_1 = _i6+1, x1_1, x1_0 {
-                                                                       if p != x1_0 || idx != x1_1 {
-                                                                               continue
-                                                                       }
-                                                                       x1_2 := x1.Args[2]
-                                                                       if x1_2.Op != OpAMD64SHRQconst || x1_2.AuxInt != 48 || w != x1_2.Args[0] {
-                                                                               continue
-                                                                       }
-                                                                       x0 := x1.Args[3]
-                                                                       if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-7 || x0.Aux != s {
-                                                                               continue
-                                                                       }
-                                                                       mem := x0.Args[3]
-                                                                       x0_0 := x0.Args[0]
-                                                                       x0_1 := x0.Args[1]
-                                                                       for _i7 := 0; _i7 <= 1; _i7, x0_0, x0_1 = _i7+1, x0_1, x0_0 {
-                                                                               if p != x0_0 || idx != x0_1 {
-                                                                                       continue
-                                                                               }
-                                                                               x0_2 := x0.Args[2]
-                                                                               if x0_2.Op != OpAMD64SHRQconst || x0_2.AuxInt != 56 || w != x0_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)) {
-                                                                                       continue
-                                                                               }
-                                                                               v.reset(OpAMD64MOVQstoreidx1)
-                                                                               v.AuxInt = i - 7
-                                                                               v.Aux = s
-                                                                               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
-                                                                               v0.AddArg(w)
-                                                                               v.AddArg4(p, idx, v0, mem)
-                                                                               return true
-                                                                       }
-                                                               }
-                                                       }
-                                               }
-                                       }
-                               }
-                       }
-               }
-               break
-       }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       if v_2.Op != OpAMD64SHRWconst || v_2.AuxInt != 8 {
-                               continue
-                       }
-                       w := v_2.Args[0]
-                       x := v_3
-                       if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVWstoreidx1)
-                               v.AuxInt = i - 1
-                               v.Aux = s
-                               v.AddArg4(p, idx, w, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       if v_2.Op != OpAMD64SHRLconst || v_2.AuxInt != 8 {
-                               continue
-                       }
-                       w := v_2.Args[0]
-                       x := v_3
-                       if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVWstoreidx1)
-                               v.AuxInt = i - 1
-                               v.Aux = s
-                               v.AddArg4(p, idx, w, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 8 {
-                               continue
-                       }
-                       w := v_2.Args[0]
-                       x := v_3
-                       if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVWstoreidx1)
-                               v.AuxInt = i - 1
-                               v.Aux = s
-                               v.AddArg4(p, idx, w, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       if v_2.Op != OpAMD64SHRLconst {
-                               continue
-                       }
-                       j := v_2.AuxInt
-                       w := v_2.Args[0]
-                       x := v_3
-                       if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || idx != x_1 {
-                                       continue
-                               }
-                               w0 := x.Args[2]
-                               if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVWstoreidx1)
-                               v.AuxInt = i - 1
-                               v.Aux = s
-                               v.AddArg4(p, idx, w0, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       if v_2.Op != OpAMD64SHRQconst {
-                               continue
-                       }
-                       j := v_2.AuxInt
-                       w := v_2.Args[0]
-                       x := v_3
-                       if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || idx != x_1 {
-                                       continue
-                               }
-                               w0 := x.Args[2]
-                               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVWstoreidx1)
-                               v.AuxInt = i - 1
-                               v.Aux = s
-                               v.AddArg4(p, idx, w0, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (MOVBstoreidx1 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVBstore [i+c] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       if v_1.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       c := v_1.AuxInt
-                       w := v_2
-                       mem := v_3
-                       if !(is32Bit(i + c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVBstore)
-                       v.AuxInt = i + c
-                       v.Aux = s
-                       v.AddArg3(p, w, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVBstoreidx1 [off] {s} ptr idx (MOVLconst [c]) mem)
-       // cond: validValAndOff(int64(int8(c)), off)
-       // result: (MOVBstoreconstidx1 [makeValAndOff(int64(int8(c)), off)] {s} ptr idx mem)
-       for {
-               off := v.AuxInt
-               s := v.Aux
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_2.AuxInt
-               mem := v_3
-               if !(validValAndOff(int64(int8(c)), off)) {
-                       break
-               }
-               v.reset(OpAMD64MOVBstoreconstidx1)
-               v.AuxInt = makeValAndOff(int64(int8(c)), off)
-               v.Aux = s
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVLQSX (ANDLconst [c] x))
-       // cond: c & 0x80000000 == 0
-       // result: (ANDLconst [c & 0x7fffffff] x)
-       for {
-               if v_0.Op != OpAMD64ANDLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c&0x80000000 == 0) {
-                       break
-               }
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0x7fffffff
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQSX (MOVLQSX x))
-       // result: (MOVLQSX x)
-       for {
-               if v_0.Op != OpAMD64MOVLQSX {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVLQSX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQSX (MOVWQSX x))
-       // result: (MOVWQSX x)
-       for {
-               if v_0.Op != OpAMD64MOVWQSX {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVWQSX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQSX (MOVBQSX x))
-       // result: (MOVBQSX x)
+       // match: (MOVLQSX (MOVBQSX x))
+       // result: (MOVBQSX x)
        for {
                if v_0.Op != OpAMD64MOVBQSX {
                        break
@@ -12131,58 +11519,10 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value) bool {
                v.copyOf(x)
                return true
        }
-       // match: (MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       // match: (MOVLQZX (ANDLconst [c] x))
+       // result: (ANDLconst [c] x)
        for {
-               x := v_0
-               if x.Op != OpAMD64MOVLloadidx1 {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVLloadidx4 {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx4, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVLQZX (ANDLconst [c] x))
-       // result: (ANDLconst [c] x)
-       for {
-               if v_0.Op != OpAMD64ANDLconst {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
                c := v_0.AuxInt
@@ -12396,101 +11736,117 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
                v.AddArg2(base, mem)
                return true
        }
-       // match: (MOVLload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVLload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVLload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
                mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpAMD64MOVLload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       // match: (MOVLload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVLload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVLload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ4 {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ2 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
                mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpAMD64MOVLload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       // match: (MOVLload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVLload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVLload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ8 {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ4 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
                mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpAMD64MOVLload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       // match: (MOVLload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVLloadidx1 [off] {sym} ptr idx mem)
+       // match: (MOVLload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVLload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ8 {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       mem := v_1
-                       if !(ptr.Op != OpSB) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVLloadidx1)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               mem := v_1
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
                }
-               break
+               v.reset(OpAMD64MOVLload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
+               return true
        }
        // match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
@@ -12567,375 +11923,113 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLloadidx1(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVLloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
-       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
-                               continue
-                       }
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       v.reset(OpAMD64MOVLloadidx4)
-                       v.AuxInt = c
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVLloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
-       // result: (MOVLloadidx8 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
-                               continue
-                       }
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       v.reset(OpAMD64MOVLloadidx8)
-                       v.AuxInt = c
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem)
+       // result: (MOVLstore [off] {sym} ptr x mem)
        for {
-               c := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       mem := v_2
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVLloadidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLQSX {
+                       break
                }
-               break
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg3(ptr, x, mem)
+               return true
        }
-       // match: (MOVLloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem)
+       // result: (MOVLstore [off] {sym} ptr x mem)
        for {
-               c := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVLloadidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVLloadidx1 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVLload [i+c] {s} p mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       if v_1.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       c := v_1.AuxInt
-                       mem := v_2
-                       if !(is32Bit(i + c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVLload)
-                       v.AuxInt = i + c
-                       v.Aux = s
-                       v.AddArg2(p, mem)
-                       return true
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLQZX {
+                       break
                }
-               break
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg3(ptr, x, mem)
+               return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLloadidx4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVLloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLloadidx4 [c+d] {sym} ptr idx mem)
+       // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVLstore [off1+off2] {sym} ptr val mem)
        for {
-               c := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               d := v_0.AuxInt
+               off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               idx := v_1
+               val := v_1
                mem := v_2
-               if !(is32Bit(c + d)) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = c + d
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off1 + off2
                v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVLloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+4*d)
-       // result: (MOVLloadidx4 [c+4*d] {sym} ptr idx mem)
+       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
        for {
-               c := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
+               c := v_1.AuxInt
                mem := v_2
-               if !(is32Bit(c + 4*d)) {
+               if !(validOff(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = c + 4*d
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = makeValAndOff(int64(int32(c)), off)
                v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLloadidx4 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+4*c)
-       // result: (MOVLload [i+4*c] {s} p mem)
+       // match: (MOVLstore [off] {sym} ptr (MOVQconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
                if v_1.Op != OpAMD64MOVQconst {
                        break
                }
                c := v_1.AuxInt
                mem := v_2
-               if !(is32Bit(i + 4*c)) {
+               if !(validOff(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLload)
-               v.AuxInt = i + 4*c
-               v.Aux = s
-               v.AddArg2(p, mem)
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = makeValAndOff(int64(int32(c)), off)
+               v.Aux = sym
+               v.AddArg2(ptr, mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLloadidx8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVLloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLloadidx8 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               mem := v_2
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLloadidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVLloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+8*d)
-       // result: (MOVLloadidx8 [c+8*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(c + 8*d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLloadidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVLloadidx8 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+8*c)
-       // result: (MOVLload [i+8*c] {s} p mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v_2
-               if !(is32Bit(i + 8*c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLload)
-               v.AuxInt = i + 8*c
-               v.Aux = s
-               v.AddArg2(p, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem)
-       // result: (MOVLstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLQSX {
-                       break
-               }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem)
-       // result: (MOVLstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLQZX {
-                       break
-               }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v_2
-               if !(validOff(off)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = makeValAndOff(int64(int32(c)), off)
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVQconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v_2
-               if !(validOff(off)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = makeValAndOff(int64(int32(c)), off)
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -12956,113 +12050,129 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVLstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (MOVLstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVLstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
                val := v_1
                mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
                return true
        }
-       // match: (MOVLstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (MOVLstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVLstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ4 {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ2 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
                val := v_1
                mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
                return true
        }
-       // match: (MOVLstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (MOVLstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVLstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ8 {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ4 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
                val := v_1
                mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
                return true
        }
-       // match: (MOVLstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem)
+       // match: (MOVLstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVLstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ8 {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       val := v_1
-                       mem := v_2
-                       if !(ptr.Op != OpSB) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVLstoreidx1)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
+               mem := v_2
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
                }
-               break
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
+               return true
        }
-       // match: (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstore [i-4] {s} p w mem)
+       // match: (MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVQstore [i-4] {s} p0 w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 32 {
                        break
                }
@@ -13072,22 +12182,23 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                        break
                }
                mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               p0 := x.Args[0]
+               if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVQstore)
                v.AuxInt = i - 4
                v.Aux = s
-               v.AddArg3(p, w, mem)
+               v.AddArg3(p0, w, mem)
                return true
        }
-       // match: (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstore [i-4] {s} p w0 mem)
+       // match: (MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVQstore [i-4] {s} p0 w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               p := v_0
+               p1 := v_0
                if v_1.Op != OpAMD64SHRQconst {
                        break
                }
@@ -13098,17 +12209,15 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                        break
                }
                mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
+               p0 := x.Args[0]
                w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVQstore)
                v.AuxInt = i - 4
                v.Aux = s
-               v.AddArg3(p, w0, mem)
+               v.AddArg3(p0, w0, mem)
                return true
        }
        // match: (MOVLstore [i] {s} p x1:(MOVLload [j] {s2} p2 mem) mem2:(MOVLstore [i-4] {s} p x2:(MOVLload [j-4] {s2} p2 mem) mem))
@@ -13785,113 +12894,52 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
        for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
+               c := v.AuxInt
+               s := v.Aux
+               p1 := v_0
+               x := v_1
+               if x.Op != OpAMD64MOVLstoreconst {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2)) {
+               a := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
+               mem := x.Args[1]
+               p0 := x.Args[0]
+               if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = ValAndOff(a).Off()
+               v.Aux = s
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
+               v.AddArg3(p0, v0, mem)
                return true
        }
-       // match: (MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
        for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ4 {
+               a := v.AuxInt
+               s := v.Aux
+               p1 := v_0
+               x := v_1
+               if x.Op != OpAMD64MOVLstoreconst {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVLstoreconst [x] {sym} (ADDQ ptr idx) mem)
-       // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = x
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v_0
-               x := v_1
-               if x.Op != OpAMD64MOVLstoreconst {
-                       break
-               }
-               a := x.AuxInt
-               if x.Aux != s {
-                       break
-               }
-               mem := x.Args[1]
-               if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = ValAndOff(a).Off()
-               v.Aux = s
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
-               v.AddArg3(p, v0, mem)
-               return true
-       }
-       // match: (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-       for {
-               a := v.AuxInt
-               s := v.Aux
-               p := v_0
-               x := v_1
-               if x.Op != OpAMD64MOVLstoreconst {
-                       break
-               }
-               c := x.AuxInt
-               if x.Aux != s {
+               c := x.AuxInt
+               if x.Aux != s {
                        break
                }
                mem := x.Args[1]
-               if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+               p0 := x.Args[0]
+               if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
                        break
                }
                v.reset(OpAMD64MOVQstore)
@@ -13899,7 +12947,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
                v.Aux = s
                v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64)
                v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
-               v.AddArg3(p, v0, mem)
+               v.AddArg3(p0, v0, mem)
                return true
        }
        // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
@@ -13947,667 +12995,451 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
-       // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
+       // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVOload [off1+off2] {sym} ptr mem)
        for {
-               c := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
-                               continue
-                       }
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       v.reset(OpAMD64MOVLstoreconstidx4)
-                       v.AuxInt = c
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
                }
-               break
-       }
-       // match: (MOVLstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       c := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       mem := v_2
-                       if !(ValAndOff(x).canAdd(c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVLstoreconstidx1)
-                       v.AuxInt = ValAndOff(x).add(c)
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(off1 + off2)) {
+                       break
                }
-               break
+               v.reset(OpAMD64MOVOload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               x := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       c := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       if !(ValAndOff(x).canAdd(c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVLstoreconstidx1)
-                       v.AuxInt = ValAndOff(x).add(c)
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAQ {
+                       break
                }
-               break
-       }
-       // match: (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       i := v_1
-                       x := v_2
-                       if x.Op != OpAMD64MOVLstoreconstidx1 {
-                               continue
-                       }
-                       a := x.AuxInt
-                       if x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[2]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || i != x_1 || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVQstoreidx1)
-                               v.AuxInt = ValAndOff(a).Off()
-                               v.Aux = s
-                               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-                               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
-                               v.AddArg4(p, i, v0, mem)
-                               return true
-                       }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
                }
-               break
+               v.reset(OpAMD64MOVOload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg2(base, mem)
+               return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLstoreconstidx4(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       config := b.Func.Config
        typ := &b.Func.Config.Types
-       // match: (MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVOstore [off1+off2] {sym} ptr val mem)
        for {
-               x := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_0.AuxInt
+               off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               idx := v_1
+               val := v_1
                mem := v_2
-               if !(ValAndOff(x).canAdd(c)) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(c)
+               v.reset(OpAMD64MOVOstore)
+               v.AuxInt = off1 + off2
                v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(4*c)
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
+       // match: (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v_1
                mem := v_2
-               if !(ValAndOff(x).canAdd(4 * c)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(4 * c)
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpAMD64MOVOstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+       // match: (MOVOstore [dstOff] {dstSym} ptr (MOVOload [srcOff] {srcSym} (SB) _) mem)
+       // cond: symIsRO(srcSym)
+       // result: (MOVQstore [dstOff+8] {dstSym} ptr (MOVQconst [int64(read64(srcSym, srcOff+8, config.ctxt.Arch.ByteOrder))]) (MOVQstore [dstOff] {dstSym} ptr (MOVQconst [int64(read64(srcSym, srcOff, config.ctxt.Arch.ByteOrder))]) mem))
        for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v_0
-               i := v_1
-               x := v_2
-               if x.Op != OpAMD64MOVLstoreconstidx4 {
+               dstOff := v.AuxInt
+               dstSym := v.Aux
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVOload {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               srcOff := v_1.AuxInt
+               srcSym := v_1.Aux
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpSB {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+               mem := v_2
+               if !(symIsRO(srcSym)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = ValAndOff(a).Off()
-               v.Aux = s
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type)
-               v0.AuxInt = 2
-               v0.AddArg(i)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v1.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
-               v.AddArg4(p, v0, v1, mem)
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = dstOff + 8
+               v.Aux = dstSym
+               v0 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = int64(read64(srcSym, srcOff+8, config.ctxt.Arch.ByteOrder))
+               v1 := b.NewValue0(v_1.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v1.AuxInt = dstOff
+               v1.Aux = dstSym
+               v2 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v2.AuxInt = int64(read64(srcSym, srcOff, config.ctxt.Arch.ByteOrder))
+               v1.AddArg3(ptr, v2, mem)
+               v.AddArg3(ptr, v0, v1)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLstoreidx1(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem)
-       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
+       // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQatomicload [off1+off2] {sym} ptr mem)
        for {
-               c := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
-                               continue
-                       }
-                       idx := v_1.Args[0]
-                       val := v_2
-                       mem := v_3
-                       v.reset(OpAMD64MOVLstoreidx4)
-                       v.AuxInt = c
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
                }
-               break
-       }
-       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
-       // result: (MOVLstoreidx8 [c] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
-                               continue
-                       }
-                       idx := v_1.Args[0]
-                       val := v_2
-                       mem := v_3
-                       v.reset(OpAMD64MOVLstoreidx8)
-                       v.AuxInt = c
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(off1 + off2)) {
+                       break
                }
-               break
+               v.reset(OpAMD64MOVQatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVLstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       val := v_2
-                       mem := v_3
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVLstoreidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVLstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       val := v_2
-                       mem := v_3
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVLstoreidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAQ {
+                       break
                }
-               break
-       }
-       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p idx w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 32 {
-                               continue
-                       }
-                       w := v_2.Args[0]
-                       x := v_3
-                       if x.Op != OpAMD64MOVLstoreidx1 || x.AuxInt != i-4 || x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVQstoreidx1)
-                               v.AuxInt = i - 4
-                               v.Aux = s
-                               v.AddArg4(p, idx, w, mem)
-                               return true
-                       }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
                }
-               break
+               v.reset(OpAMD64MOVQatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQf2i(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MOVQf2i <t> (Arg <u> [off] {sym}))
+       // cond: t.Size() == u.Size()
+       // result: @b.Func.Entry (Arg <t> [off] {sym})
        for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       if v_2.Op != OpAMD64SHRQconst {
-                               continue
-                       }
-                       j := v_2.AuxInt
-                       w := v_2.Args[0]
-                       x := v_3
-                       if x.Op != OpAMD64MOVLstoreidx1 || x.AuxInt != i-4 || x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || idx != x_1 {
-                                       continue
-                               }
-                               w0 := x.Args[2]
-                               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVQstoreidx1)
-                               v.AuxInt = i - 4
-                               v.Aux = s
-                               v.AddArg4(p, idx, w0, mem)
-                               return true
-                       }
+               t := v.Type
+               if v_0.Op != OpArg {
+                       break
                }
-               break
-       }
-       // match: (MOVLstoreidx1 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVLstore [i+c] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       if v_1.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       c := v_1.AuxInt
-                       w := v_2
-                       mem := v_3
-                       if !(is32Bit(i + c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVLstore)
-                       v.AuxInt = i + c
-                       v.Aux = s
-                       v.AddArg3(p, w, mem)
-                       return true
+               u := v_0.Type
+               off := v_0.AuxInt
+               sym := v_0.Aux
+               if !(t.Size() == u.Size()) {
+                       break
                }
-               break
+               b = b.Func.Entry
+               v0 := b.NewValue0(v.Pos, OpArg, t)
+               v.copyOf(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               return true
        }
-       // match: (MOVLstoreidx1 [off] {s} ptr idx (MOVQconst [c]) mem)
-       // cond: validValAndOff(int64(int32(c)), off)
-       // result: (MOVLstoreconstidx1 [makeValAndOff(int64(int32(c)), off)] {s} ptr idx mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQi2f(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MOVQi2f <t> (Arg <u> [off] {sym}))
+       // cond: t.Size() == u.Size()
+       // result: @b.Func.Entry (Arg <t> [off] {sym})
        for {
-               off := v.AuxInt
-               s := v.Aux
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64MOVQconst {
+               t := v.Type
+               if v_0.Op != OpArg {
                        break
                }
-               c := v_2.AuxInt
-               mem := v_3
-               if !(validValAndOff(int64(int32(c)), off)) {
+               u := v_0.Type
+               off := v_0.AuxInt
+               sym := v_0.Aux
+               if !(t.Size() == u.Size()) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = makeValAndOff(int64(int32(c)), off)
-               v.Aux = s
-               v.AddArg3(ptr, idx, mem)
+               b = b.Func.Entry
+               v0 := b.NewValue0(v.Pos, OpArg, t)
+               v.copyOf(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLstoreidx4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (MOVLstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLstoreidx4 [c+d] {sym} ptr idx val mem)
+       config := b.Func.Config
+       // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: x
        for {
-               c := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVQstore {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + d)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
+               v.copyOf(x)
                return true
        }
-       // match: (MOVLstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+4*d)
-       // result: (MOVLstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQload [off1+off2] {sym} ptr mem)
        for {
-               c := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + 4*d)) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = c + 4*d
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = off1 + off2
                v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem)
+       // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 32 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v_3
-               if x.Op != OpAMD64MOVLstoreidx4 || x.AuxInt != i-4 || x.Aux != s {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 2
-               v0.AddArg(idx)
-               v.AddArg4(p, v0, w, mem)
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg2(base, mem)
                return true
        }
-       // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
+       // match: (MOVQload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVQload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v_3
-               if x.Op != OpAMD64MOVLstoreidx4 || x.AuxInt != i-4 || x.Aux != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ1 {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               mem := v_1
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 2
-               v0.AddArg(idx)
-               v.AddArg4(p, v0, w0, mem)
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       // match: (MOVLstoreidx4 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+4*c)
-       // result: (MOVLstore [i+4*c] {s} p w mem)
+       // match: (MOVQload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVQload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ2 {
                        break
                }
-               c := v_1.AuxInt
-               w := v_2
-               mem := v_3
-               if !(is32Bit(i + 4*c)) {
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               mem := v_1
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i + 4*c
-               v.Aux = s
-               v.AddArg3(p, w, mem)
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       // match: (MOVLstoreidx4 [off] {s} ptr idx (MOVQconst [c]) mem)
-       // cond: validValAndOff(int64(int32(c)), off)
-       // result: (MOVLstoreconstidx4 [makeValAndOff(int64(int32(c)), off)] {s} ptr idx mem)
+       // match: (MOVQload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVQload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
        for {
-               off := v.AuxInt
-               s := v.Aux
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64MOVQconst {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ4 {
                        break
                }
-               c := v_2.AuxInt
-               mem := v_3
-               if !(validValAndOff(int64(int32(c)), off)) {
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               mem := v_1
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = makeValAndOff(int64(int32(c)), off)
-               v.Aux = s
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstoreidx8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVLstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLstoreidx8 [c+d] {sym} ptr idx val mem)
+       // match: (MOVQload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVQload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ8 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + d)) {
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               mem := v_1
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       // match: (MOVLstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+8*d)
-       // result: (MOVLstoreidx8 [c+8*d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + 8*d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVLstoreidx8 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+8*c)
-       // result: (MOVLstore [i+8*c] {s} p w mem)
+       // match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
+       // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               c := v_1.AuxInt
-               w := v_2
-               mem := v_3
-               if !(is32Bit(i + 8*c)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i + 8*c
-               v.Aux = s
-               v.AddArg3(p, w, mem)
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg2(base, mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVOload [off1+off2] {sym} ptr mem)
+       // result: (MOVQload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
                off2 := v_0.AuxInt
@@ -14616,46 +13448,52 @@ func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVOload)
+               v.reset(OpAMD64MOVQload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _))
+       // result: (MOVQf2i val)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVSDstore || v_1.AuxInt != off || v_1.Aux != sym {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVOload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(base, mem)
+               v.reset(OpAMD64MOVQf2i)
+               v.AddArg(val)
+               return true
+       }
+       // match: (MOVQload [off] {sym} (SB) _)
+       // cond: symIsRO(sym)
+       // result: (MOVQconst [int64(read64(sym, off, config.ctxt.Arch.ByteOrder))])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               if v_0.Op != OpSB || !(symIsRO(sym)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = int64(read64(sym, off, config.ctxt.Arch.ByteOrder))
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       config := b.Func.Config
-       typ := &b.Func.Config.Types
-       // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVOstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVQstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -14669,15 +13507,36 @@ func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVOstore)
+               v.reset(OpAMD64MOVQstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem)
+       // cond: validValAndOff(c,off)
+       // result: (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v_2
+               if !(validValAndOff(c, off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -14692,3760 +13551,906 @@ func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVOstore)
+               v.reset(OpAMD64MOVQstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVOstore [dstOff] {dstSym} ptr (MOVOload [srcOff] {srcSym} (SB) _) mem)
-       // cond: symIsRO(srcSym)
-       // result: (MOVQstore [dstOff+8] {dstSym} ptr (MOVQconst [int64(read64(srcSym, srcOff+8, config.ctxt.Arch.ByteOrder))]) (MOVQstore [dstOff] {dstSym} ptr (MOVQconst [int64(read64(srcSym, srcOff, config.ctxt.Arch.ByteOrder))]) mem))
+       // match: (MOVQstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVQstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
        for {
-               dstOff := v.AuxInt
-               dstSym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVOload {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ1 {
                        break
                }
-               srcOff := v_1.AuxInt
-               srcSym := v_1.Aux
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpSB {
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
+               mem := v_2
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
+               return true
+       }
+       // match: (MOVQstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVQstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
+       for {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ2 {
                        break
                }
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
                mem := v_2
-               if !(symIsRO(srcSym)) {
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
                v.reset(OpAMD64MOVQstore)
-               v.AuxInt = dstOff + 8
-               v.Aux = dstSym
-               v0 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64(read64(srcSym, srcOff+8, config.ctxt.Arch.ByteOrder))
-               v1 := b.NewValue0(v_1.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v1.AuxInt = dstOff
-               v1.Aux = dstSym
-               v2 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v2.AuxInt = int64(read64(srcSym, srcOff, config.ctxt.Arch.ByteOrder))
-               v1.AddArg3(ptr, v2, mem)
-               v.AddArg3(ptr, v0, v1)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVQatomicload [off1+off2] {sym} ptr mem)
+       // match: (MOVQstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVQstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ4 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1 + off2)) {
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
+               mem := v_2
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVQatomicload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
                return true
        }
-       // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MOVQstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVQstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
+       for {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ8 {
+                       break
+               }
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
+               mem := v_2
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
+               return true
+       }
+       // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
+       // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
                        break
                }
-               v.reset(OpAMD64MOVQatomicload)
+               v.reset(OpAMD64MOVQstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(ptr, mem)
+               v.AddArg3(base, val, mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQf2i(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVQf2i <t> (Arg <u> [off] {sym}))
-       // cond: t.Size() == u.Size()
-       // result: @b.Func.Entry (Arg <t> [off] {sym})
+       // match: (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQstore [off1+off2] {sym} ptr val mem)
        for {
-               t := v.Type
-               if v_0.Op != OpArg {
+               off1 := v.AuxInt
+               sym := v.Aux
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               u := v_0.Type
-               off := v_0.AuxInt
-               sym := v_0.Aux
-               if !(t.Size() == u.Size()) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               b = b.Func.Entry
-               v0 := b.NewValue0(v.Pos, OpArg, t)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQi2f(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVQi2f <t> (Arg <u> [off] {sym}))
-       // cond: t.Size() == u.Size()
-       // result: @b.Func.Entry (Arg <t> [off] {sym})
-       for {
-               t := v.Type
-               if v_0.Op != OpArg {
-                       break
-               }
-               u := v_0.Type
-               off := v_0.AuxInt
-               sym := v_0.Aux
-               if !(t.Size() == u.Size()) {
-                       break
-               }
-               b = b.Func.Entry
-               v0 := b.NewValue0(v.Pos, OpArg, t)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: x
+       // match: (MOVQstore {sym} [off] ptr y:(ADDQload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ADDQmodify [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                ptr := v_0
-               if v_1.Op != OpAMD64MOVQstore {
+               y := v_1
+               if y.Op != OpAMD64ADDQload || y.AuxInt != off || y.Aux != sym {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               x := v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               mem := y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.copyOf(x)
+               v.reset(OpAMD64ADDQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVQload [off1+off2] {sym} ptr mem)
+       // match: (MOVQstore {sym} [off] ptr y:(ANDQload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ANDQmodify [off] {sym} ptr x mem)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64ANDQload || y.AuxInt != off || y.Aux != sym {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1 + off2)) {
+               mem := y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.reset(OpAMD64MOVQload)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64ANDQmodify)
+               v.AuxInt = off
                v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(base, mem)
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVQload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVQstore {sym} [off] ptr y:(ORQload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ORQmodify [off] {sym} ptr x mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64ORQload || y.AuxInt != off || y.Aux != sym {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               mem := y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpAMD64ORQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVQload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVQstore {sym} [off] ptr y:(XORQload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (XORQmodify [off] {sym} ptr x mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ8 {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64XORQload || y.AuxInt != off || y.Aux != sym {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               mem := y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpAMD64XORQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVQload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVQloadidx1 [off] {sym} ptr idx mem)
+       // match: (MOVQstore {sym} [off] ptr y:(ADDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (ADDQmodify [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64ADDQ {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       mem := v_1
-                       if !(ptr.Op != OpSB) {
+               _ = y.Args[1]
+               y_0 := y.Args[0]
+               y_1 := y.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
+                       l := y_0
+                       if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
+                               continue
+                       }
+                       mem := l.Args[1]
+                       if ptr != l.Args[0] {
+                               continue
+                       }
+                       x := y_1
+                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
                                continue
                        }
-                       v.reset(OpAMD64MOVQloadidx1)
+                       v.reset(OpAMD64ADDQmodify)
                        v.AuxInt = off
                        v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
+                       v.AddArg3(ptr, x, mem)
                        return true
                }
                break
        }
-       // match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MOVQstore {sym} [off] ptr y:(SUBQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (SUBQmodify [off] {sym} ptr x mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAL {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64SUBQ {
                        break
                }
-               v.reset(OpAMD64MOVQload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(base, mem)
-               return true
-       }
-       // match: (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVQload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDLconst {
+               x := y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1 + off2)) {
+               mem := l.Args[1]
+               if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
                        break
                }
-               v.reset(OpAMD64MOVQload)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64SUBQmodify)
+               v.AuxInt = off
                v.Aux = sym
-               v.AddArg2(ptr, mem)
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _))
-       // result: (MOVQf2i val)
+       // match: (MOVQstore {sym} [off] ptr y:(ANDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (ANDQmodify [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                ptr := v_0
-               if v_1.Op != OpAMD64MOVSDstore || v_1.AuxInt != off || v_1.Aux != sym {
-                       break
-               }
-               val := v_1.Args[1]
-               if ptr != v_1.Args[0] {
-                       break
-               }
-               v.reset(OpAMD64MOVQf2i)
-               v.AddArg(val)
-               return true
-       }
-       // match: (MOVQload [off] {sym} (SB) _)
-       // cond: symIsRO(sym)
-       // result: (MOVQconst [int64(read64(sym, off, config.ctxt.Arch.ByteOrder))])
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpSB || !(symIsRO(sym)) {
+               y := v_1
+               if y.Op != OpAMD64ANDQ {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = int64(read64(sym, off, config.ctxt.Arch.ByteOrder))
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQloadidx1(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVQloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
-       // result: (MOVQloadidx8 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
+               _ = y.Args[1]
+               y_0 := y.Args[0]
+               y_1 := y.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
+                       l := y_0
+                       if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                                continue
                        }
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       v.reset(OpAMD64MOVQloadidx8)
-                       v.AuxInt = c
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
+                       mem := l.Args[1]
+                       if ptr != l.Args[0] {
                                continue
                        }
-                       d := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       mem := v_2
-                       if !(is32Bit(c + d)) {
+                       x := y_1
+                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
                                continue
                        }
-                       v.reset(OpAMD64MOVQloadidx1)
-                       v.AuxInt = c + d
+                       v.reset(OpAMD64ANDQmodify)
+                       v.AuxInt = off
                        v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
+                       v.AddArg3(ptr, x, mem)
                        return true
                }
                break
        }
-       // match: (MOVQloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (MOVQstore {sym} [off] ptr y:(ORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (ORQmodify [off] {sym} ptr x mem)
        for {
-               c := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64ORQ {
+                       break
+               }
+               _ = y.Args[1]
+               y_0 := y.Args[0]
+               y_1 := y.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
+                       l := y_0
+                       if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                                continue
                        }
-                       d := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       if !(is32Bit(c + d)) {
+                       mem := l.Args[1]
+                       if ptr != l.Args[0] {
                                continue
                        }
-                       v.reset(OpAMD64MOVQloadidx1)
-                       v.AuxInt = c + d
+                       x := y_1
+                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
+                               continue
+                       }
+                       v.reset(OpAMD64ORQmodify)
+                       v.AuxInt = off
                        v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
+                       v.AddArg3(ptr, x, mem)
                        return true
                }
                break
        }
-       // match: (MOVQloadidx1 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVQload [i+c] {s} p mem)
+       // match: (MOVQstore {sym} [off] ptr y:(XORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (XORQmodify [off] {sym} ptr x mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       if v_1.Op != OpAMD64MOVQconst {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64XORQ {
+                       break
+               }
+               _ = y.Args[1]
+               y_0 := y.Args[0]
+               y_1 := y.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
+                       l := y_0
+                       if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                                continue
                        }
-                       c := v_1.AuxInt
-                       mem := v_2
-                       if !(is32Bit(i + c)) {
+                       mem := l.Args[1]
+                       if ptr != l.Args[0] {
                                continue
                        }
-                       v.reset(OpAMD64MOVQload)
-                       v.AuxInt = i + c
-                       v.Aux = s
-                       v.AddArg2(p, mem)
+                       x := y_1
+                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
+                               continue
+                       }
+                       v.reset(OpAMD64XORQmodify)
+                       v.AuxInt = off
+                       v.Aux = sym
+                       v.AddArg3(ptr, x, mem)
                        return true
                }
                break
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQloadidx8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVQloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVQloadidx8 [c+d] {sym} ptr idx mem)
+       // match: (MOVQstore {sym} [off] ptr y:(BTCQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (BTCQmodify [off] {sym} ptr x mem)
        for {
-               c := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64BTCQ {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               mem := v_2
-               if !(is32Bit(c + d)) {
+               x := y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx8)
-               v.AuxInt = c + d
+               mem := l.Args[1]
+               if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
+                       break
+               }
+               v.reset(OpAMD64BTCQmodify)
+               v.AuxInt = off
                v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVQloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+8*d)
-       // result: (MOVQloadidx8 [c+8*d] {sym} ptr idx mem)
+       // match: (MOVQstore {sym} [off] ptr y:(BTRQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (BTRQmodify [off] {sym} ptr x mem)
        for {
-               c := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
+               y := v_1
+               if y.Op != OpAMD64BTRQ {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(c + 8*d)) {
+               x := y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx8)
-               v.AuxInt = c + 8*d
+               mem := l.Args[1]
+               if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
+                       break
+               }
+               v.reset(OpAMD64BTRQmodify)
+               v.AuxInt = off
                v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVQloadidx8 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+8*c)
-       // result: (MOVQload [i+8*c] {s} p mem)
+       // match: (MOVQstore {sym} [off] ptr y:(BTSQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (BTSQmodify [off] {sym} ptr x mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64BTSQ {
                        break
                }
-               c := v_1.AuxInt
-               mem := v_2
-               if !(is32Bit(i + 8*c)) {
+               x := y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                        break
                }
-               v.reset(OpAMD64MOVQload)
-               v.AuxInt = i + 8*c
-               v.Aux = s
-               v.AddArg2(p, mem)
+               mem := l.Args[1]
+               if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
+                       break
+               }
+               v.reset(OpAMD64BTSQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVQstore [off1+off2] {sym} ptr val mem)
+       // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
+       // result: (ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
+               ptr := v_0
+               a := v_1
+               if a.Op != OpAMD64ADDQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1 + off2)) {
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = off1 + off2
+               mem := l.Args[1]
+               ptr2 := l.Args[0]
+               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
                v.Aux = sym
-               v.AddArg3(ptr, val, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem)
-       // cond: validValAndOff(c,off)
-       // result: (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
+       // match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
+       // result: (ANDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                ptr := v_0
-               if v_1.Op != OpAMD64MOVQconst {
+               a := v_1
+               if a.Op != OpAMD64ANDQconst {
                        break
                }
-               c := v_1.AuxInt
-               mem := v_2
-               if !(validValAndOff(c, off)) {
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
+               mem := l.Args[1]
+               ptr2 := l.Args[0]
+               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQconstmodify)
                v.AuxInt = makeValAndOff(c, off)
                v.Aux = sym
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
+       // result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
+               a := v_1
+               if a.Op != OpAMD64ORQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(base, val, mem)
-               return true
-       }
-       // match: (MOVQstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVQstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ8 {
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               mem := l.Args[1]
+               ptr2 := l.Args[0]
+               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
+               v.reset(OpAMD64ORQconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVQstoreidx1 [off] {sym} ptr idx val mem)
+       // match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
+       // result: (XORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
+               ptr := v_0
+               a := v_1
+               if a.Op != OpAMD64XORQconst {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       val := v_1
-                       mem := v_2
-                       if !(ptr.Op != OpSB) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVQstoreidx1)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAL {
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+               mem := l.Args[1]
+               ptr2 := l.Args[0]
+               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(base, val, mem)
+               v.reset(OpAMD64XORQconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVQstore [off1+off2] {sym} ptr val mem)
+       // match: (MOVQstore [off] {sym} ptr a:(BTCQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
+       // result: (BTCQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               if v_0.Op != OpAMD64ADDLconst {
+               ptr := v_0
+               a := v_1
+               if a.Op != OpAMD64BTCQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1 + off2)) {
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = off1 + off2
+               mem := l.Args[1]
+               ptr2 := l.Args[0]
+               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
+                       break
+               }
+               v.reset(OpAMD64BTCQconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
                v.Aux = sym
-               v.AddArg3(ptr, val, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(ADDQload x [off] {sym} ptr mem) mem)
-       // cond: y.Uses==1 && clobber(y)
-       // result: (ADDQmodify [off] {sym} ptr x mem)
+       // match: (MOVQstore [off] {sym} ptr a:(BTRQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
+       // result: (BTRQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ADDQload || y.AuxInt != off || y.Aux != sym {
+               a := v_1
+               if a.Op != OpAMD64BTRQconst {
                        break
                }
-               mem := y.Args[2]
-               x := y.Args[0]
-               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                        break
                }
-               v.reset(OpAMD64ADDQmodify)
-               v.AuxInt = off
+               mem := l.Args[1]
+               ptr2 := l.Args[0]
+               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
+                       break
+               }
+               v.reset(OpAMD64BTRQconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
                v.Aux = sym
-               v.AddArg3(ptr, x, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(ANDQload x [off] {sym} ptr mem) mem)
-       // cond: y.Uses==1 && clobber(y)
-       // result: (ANDQmodify [off] {sym} ptr x mem)
+       // match: (MOVQstore [off] {sym} ptr a:(BTSQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
+       // result: (BTSQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ANDQload || y.AuxInt != off || y.Aux != sym {
+               a := v_1
+               if a.Op != OpAMD64BTSQconst {
                        break
                }
-               mem := y.Args[2]
-               x := y.Args[0]
-               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
                        break
                }
-               v.reset(OpAMD64ANDQmodify)
-               v.AuxInt = off
+               mem := l.Args[1]
+               ptr2 := l.Args[0]
+               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
+                       break
+               }
+               v.reset(OpAMD64BTSQconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
                v.Aux = sym
-               v.AddArg3(ptr, x, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(ORQload x [off] {sym} ptr mem) mem)
-       // cond: y.Uses==1 && clobber(y)
-       // result: (ORQmodify [off] {sym} ptr x mem)
+       // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem)
+       // result: (MOVSDstore [off] {sym} ptr val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ORQload || y.AuxInt != off || y.Aux != sym {
-                       break
-               }
-               mem := y.Args[2]
-               x := y.Args[0]
-               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
+               if v_1.Op != OpAMD64MOVQf2i {
                        break
                }
-               v.reset(OpAMD64ORQmodify)
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVSDstore)
                v.AuxInt = off
                v.Aux = sym
-               v.AddArg3(ptr, x, mem)
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(XORQload x [off] {sym} ptr mem) mem)
-       // cond: y.Uses==1 && clobber(y)
-       // result: (XORQmodify [off] {sym} ptr x mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64XORQload || y.AuxInt != off || y.Aux != sym {
+               sc := v.AuxInt
+               s := v.Aux
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               mem := y.Args[2]
-               x := y.Args[0]
-               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64XORQmodify)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg3(ptr, x, mem)
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(ADDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (ADDQmodify [off] {sym} ptr x mem)
+       // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ADDQ {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               _ = y.Args[1]
-               y_0 := y.Args[0]
-               y_1 := y.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-                       l := y_0
-                       if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                               continue
-                       }
-                       mem := l.Args[1]
-                       if ptr != l.Args[0] {
-                               continue
-                       }
-                       x := y_1
-                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64ADDQmodify)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg3(ptr, x, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstore {sym} [off] ptr y:(SUBQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (SUBQmodify [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64SUBQ {
-                       break
-               }
-               x := y.Args[1]
-               l := y.Args[0]
-               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                       break
-               }
-               mem := l.Args[1]
-               if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                       break
-               }
-               v.reset(OpAMD64SUBQmodify)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
-       // match: (MOVQstore {sym} [off] ptr y:(ANDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (ANDQmodify [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ANDQ {
-                       break
-               }
-               _ = y.Args[1]
-               y_0 := y.Args[0]
-               y_1 := y.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-                       l := y_0
-                       if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                               continue
-                       }
-                       mem := l.Args[1]
-                       if ptr != l.Args[0] {
-                               continue
-                       }
-                       x := y_1
-                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64ANDQmodify)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg3(ptr, x, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstore {sym} [off] ptr y:(ORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (ORQmodify [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ORQ {
-                       break
-               }
-               _ = y.Args[1]
-               y_0 := y.Args[0]
-               y_1 := y.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-                       l := y_0
-                       if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                               continue
-                       }
-                       mem := l.Args[1]
-                       if ptr != l.Args[0] {
-                               continue
-                       }
-                       x := y_1
-                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64ORQmodify)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg3(ptr, x, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstore {sym} [off] ptr y:(XORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (XORQmodify [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64XORQ {
-                       break
-               }
-               _ = y.Args[1]
-               y_0 := y.Args[0]
-               y_1 := y.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-                       l := y_0
-                       if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                               continue
-                       }
-                       mem := l.Args[1]
-                       if ptr != l.Args[0] {
-                               continue
-                       }
-                       x := y_1
-                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64XORQmodify)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg3(ptr, x, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstore {sym} [off] ptr y:(BTCQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (BTCQmodify [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64BTCQ {
-                       break
-               }
-               x := y.Args[1]
-               l := y.Args[0]
-               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                       break
-               }
-               mem := l.Args[1]
-               if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                       break
-               }
-               v.reset(OpAMD64BTCQmodify)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
-       // match: (MOVQstore {sym} [off] ptr y:(BTRQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (BTRQmodify [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64BTRQ {
-                       break
-               }
-               x := y.Args[1]
-               l := y.Args[0]
-               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                       break
-               }
-               mem := l.Args[1]
-               if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                       break
-               }
-               v.reset(OpAMD64BTRQmodify)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
-       // match: (MOVQstore {sym} [off] ptr y:(BTSQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (BTSQmodify [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64BTSQ {
-                       break
-               }
-               x := y.Args[1]
-               l := y.Args[0]
-               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                       break
-               }
-               mem := l.Args[1]
-               if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                       break
-               }
-               v.reset(OpAMD64BTSQmodify)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
-       // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
-       // result: (ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               a := v_1
-               if a.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := a.AuxInt
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                       break
-               }
-               mem := l.Args[1]
-               ptr2 := l.Args[0]
-               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
-                       break
-               }
-               v.reset(OpAMD64ADDQconstmodify)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
-       // result: (ANDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               a := v_1
-               if a.Op != OpAMD64ANDQconst {
-                       break
-               }
-               c := a.AuxInt
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                       break
-               }
-               mem := l.Args[1]
-               ptr2 := l.Args[0]
-               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQconstmodify)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
-       // result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               a := v_1
-               if a.Op != OpAMD64ORQconst {
-                       break
-               }
-               c := a.AuxInt
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                       break
-               }
-               mem := l.Args[1]
-               ptr2 := l.Args[0]
-               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
-                       break
-               }
-               v.reset(OpAMD64ORQconstmodify)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
-       // result: (XORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               a := v_1
-               if a.Op != OpAMD64XORQconst {
-                       break
-               }
-               c := a.AuxInt
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                       break
-               }
-               mem := l.Args[1]
-               ptr2 := l.Args[0]
-               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
-                       break
-               }
-               v.reset(OpAMD64XORQconstmodify)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVQstore [off] {sym} ptr a:(BTCQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
-       // result: (BTCQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               a := v_1
-               if a.Op != OpAMD64BTCQconst {
-                       break
-               }
-               c := a.AuxInt
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                       break
-               }
-               mem := l.Args[1]
-               ptr2 := l.Args[0]
-               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
-                       break
-               }
-               v.reset(OpAMD64BTCQconstmodify)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVQstore [off] {sym} ptr a:(BTRQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
-       // result: (BTRQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               a := v_1
-               if a.Op != OpAMD64BTRQconst {
-                       break
-               }
-               c := a.AuxInt
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                       break
-               }
-               mem := l.Args[1]
-               ptr2 := l.Args[0]
-               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
-                       break
-               }
-               v.reset(OpAMD64BTRQconstmodify)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVQstore [off] {sym} ptr a:(BTSQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a)
-       // result: (BTSQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               a := v_1
-               if a.Op != OpAMD64BTSQconst {
-                       break
-               }
-               c := a.AuxInt
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym {
-                       break
-               }
-               mem := l.Args[1]
-               ptr2 := l.Args[0]
-               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) {
-                       break
-               }
-               v.reset(OpAMD64BTSQconstmodify)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem)
-       // result: (MOVSDstore [off] {sym} ptr val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVQf2i {
-                       break
-               }
-               val := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVSDstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(ValAndOff(sc).canAdd(off)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-       for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVQstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
-                       break
-               }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ8 {
-                       break
-               }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreconstidx8)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVQstoreconst [x] {sym} (ADDQ ptr idx) mem)
-       // result: (MOVQstoreconstidx1 [x] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               v.reset(OpAMD64MOVQstoreconstidx1)
-               v.AuxInt = x
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
-       // cond: config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)
-       // result: (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v_0
-               x := v_1
-               if x.Op != OpAMD64MOVQstoreconst {
-                       break
-               }
-               c2 := x.AuxInt
-               if x.Aux != s {
-                       break
-               }
-               mem := x.Args[1]
-               if p != x.Args[0] || !(config.useSSE && x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVOstore)
-               v.AuxInt = ValAndOff(c2).Off()
-               v.Aux = s
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVOconst, types.TypeInt128)
-               v0.AuxInt = 0
-               v.AddArg3(p, v0, mem)
-               return true
-       }
-       // match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-       for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAL {
-                       break
-               }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               if v_0.Op != OpAMD64ADDLconst {
-                       break
-               }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(ValAndOff(sc).canAdd(off)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
-       // result: (MOVQstoreconstidx8 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
-                               continue
-                       }
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       v.reset(OpAMD64MOVQstoreconstidx8)
-                       v.AuxInt = c
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       c := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       mem := v_2
-                       if !(ValAndOff(x).canAdd(c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVQstoreconstidx1)
-                       v.AuxInt = ValAndOff(x).add(c)
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       c := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       if !(ValAndOff(x).canAdd(c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVQstoreconstidx1)
-                       v.AuxInt = ValAndOff(x).add(c)
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstoreconstidx8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               mem := v_2
-               if !(ValAndOff(x).canAdd(c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreconstidx8)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(8*c)
-       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v_2
-               if !(ValAndOff(x).canAdd(8 * c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreconstidx8)
-               v.AuxInt = ValAndOff(x).add(8 * c)
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstoreidx1(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVQstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
-       // result: (MOVQstoreidx8 [c] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
-                               continue
-                       }
-                       idx := v_1.Args[0]
-                       val := v_2
-                       mem := v_3
-                       v.reset(OpAMD64MOVQstoreidx8)
-                       v.AuxInt = c
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       val := v_2
-                       mem := v_3
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVQstoreidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       val := v_2
-                       mem := v_3
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVQstoreidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstoreidx1 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVQstore [i+c] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       if v_1.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       c := v_1.AuxInt
-                       w := v_2
-                       mem := v_3
-                       if !(is32Bit(i + c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVQstore)
-                       v.AuxInt = i + c
-                       v.Aux = s
-                       v.AddArg3(p, w, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstoreidx1 [off] {s} ptr idx (MOVQconst [c]) mem)
-       // cond: validValAndOff(c, off)
-       // result: (MOVQstoreconstidx1 [makeValAndOff(c, off)] {s} ptr idx mem)
-       for {
-               off := v.AuxInt
-               s := v.Aux
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_2.AuxInt
-               mem := v_3
-               if !(validValAndOff(c, off)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreconstidx1)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = s
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstoreidx8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVQstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVQstoreidx8 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVQstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+8*d)
-       // result: (MOVQstoreidx8 [c+8*d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + 8*d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVQstoreidx8 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+8*c)
-       // result: (MOVQstore [i+8*c] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               w := v_2
-               mem := v_3
-               if !(is32Bit(i + 8*c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = i + 8*c
-               v.Aux = s
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVQstoreidx8 [off] {s} ptr idx (MOVQconst [c]) mem)
-       // cond: validValAndOff(c, off)
-       // result: (MOVQstoreconstidx8 [makeValAndOff(c, off)] {s} ptr idx mem)
-       for {
-               off := v.AuxInt
-               s := v.Aux
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_2.AuxInt
-               mem := v_3
-               if !(validValAndOff(c, off)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstoreconstidx8)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = s
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVSDload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(base, mem)
-               return true
-       }
-       // match: (MOVSDload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSDload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ8 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDloadidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSDload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSDloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       mem := v_1
-                       if !(ptr.Op != OpSB) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVSDloadidx1)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _))
-       // result: (MOVQi2f val)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVQstore || v_1.AuxInt != off || v_1.Aux != sym {
-                       break
-               }
-               val := v_1.Args[1]
-               if ptr != v_1.Args[0] {
-                       break
-               }
-               v.reset(OpAMD64MOVQi2f)
-               v.AddArg(val)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDloadidx1(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSDloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
-       // result: (MOVSDloadidx8 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
-                       break
-               }
-               idx := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVSDloadidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSDloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               mem := v_2
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSDloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSDloadidx1 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVSDload [i+c] {s} p mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v_2
-               if !(is32Bit(i + c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDload)
-               v.AuxInt = i + c
-               v.Aux = s
-               v.AddArg2(p, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDloadidx8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSDloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSDloadidx8 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               mem := v_2
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDloadidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSDloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+8*d)
-       // result: (MOVSDloadidx8 [c+8*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(c + 8*d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDloadidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSDloadidx8 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+8*c)
-       // result: (MOVSDload [i+8*c] {s} p mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v_2
-               if !(is32Bit(i + 8*c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDload)
-               v.AuxInt = i + 8*c
-               v.Aux = s
-               v.AddArg2(p, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDstore(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(base, val, mem)
-               return true
-       }
-       // match: (MOVSDstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSDstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ8 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDstoreidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSDstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       val := v_1
-                       mem := v_2
-                       if !(ptr.Op != OpSB) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVSDstoreidx1)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem)
-       // result: (MOVQstore [off] {sym} ptr val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVQi2f {
-                       break
-               }
-               val := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDstoreidx1(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSDstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
-       // result: (MOVSDstoreidx8 [c] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 {
-                       break
-               }
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               v.reset(OpAMD64MOVSDstoreidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSDstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSDstoreidx1 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVSDstore [i+c] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               w := v_2
-               mem := v_3
-               if !(is32Bit(i + c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDstore)
-               v.AuxInt = i + c
-               v.Aux = s
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDstoreidx8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSDstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSDstoreidx8 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDstoreidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+8*d)
-       // result: (MOVSDstoreidx8 [c+8*d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + 8*d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDstoreidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSDstoreidx8 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+8*c)
-       // result: (MOVSDstore [i+8*c] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               w := v_2
-               mem := v_3
-               if !(is32Bit(i + 8*c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDstore)
-               v.AuxInt = i + 8*c
-               v.Aux = s
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVSSload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(base, mem)
-               return true
-       }
-       // match: (MOVSSload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSSload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ4 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSloadidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSSload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSSloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       mem := v_1
-                       if !(ptr.Op != OpSB) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVSSloadidx1)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _))
-       // result: (MOVLi2f val)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLstore || v_1.AuxInt != off || v_1.Aux != sym {
-                       break
-               }
-               val := v_1.Args[1]
-               if ptr != v_1.Args[0] {
-                       break
-               }
-               v.reset(OpAMD64MOVLi2f)
-               v.AddArg(val)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSloadidx1(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSSloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
-       // result: (MOVSSloadidx4 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
-                       break
-               }
-               idx := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVSSloadidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSSloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               mem := v_2
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSSloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSSloadidx1 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVSSload [i+c] {s} p mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v_2
-               if !(is32Bit(i + c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSload)
-               v.AuxInt = i + c
-               v.Aux = s
-               v.AddArg2(p, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSloadidx4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSSloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSSloadidx4 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               mem := v_2
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSloadidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSSloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+4*d)
-       // result: (MOVSSloadidx4 [c+4*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(c + 4*d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSloadidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVSSloadidx4 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+4*c)
-       // result: (MOVSSload [i+4*c] {s} p mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v_2
-               if !(is32Bit(i + 4*c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSload)
-               v.AuxInt = i + 4*c
-               v.Aux = s
-               v.AddArg2(p, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSstore(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(base, val, mem)
-               return true
-       }
-       // match: (MOVSSstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSSstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ4 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSstoreidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSSstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       val := v_1
-                       mem := v_2
-                       if !(ptr.Op != OpSB) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVSSstoreidx1)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem)
-       // result: (MOVLstore [off] {sym} ptr val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLi2f {
-                       break
-               }
-               val := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSstoreidx1(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSSstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem)
-       // result: (MOVSSstoreidx4 [c] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 {
-                       break
-               }
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               v.reset(OpAMD64MOVSSstoreidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSSstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSSstoreidx1 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVSSstore [i+c] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               w := v_2
-               mem := v_3
-               if !(is32Bit(i + c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSstore)
-               v.AuxInt = i + c
-               v.Aux = s
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSstoreidx4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSSstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSSstoreidx4 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSstoreidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+4*d)
-       // result: (MOVSSstoreidx4 [c+4*d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + 4*d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSstoreidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVSSstoreidx4 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+4*c)
-       // result: (MOVSSstore [i+4*c] {s} p w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               w := v_2
-               mem := v_3
-               if !(is32Bit(i + 4*c)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSstore)
-               v.AuxInt = i + 4*c
-               v.Aux = s
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVWload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWQSX (ANDLconst [c] x))
-       // cond: c & 0x8000 == 0
-       // result: (ANDLconst [c & 0x7fff] x)
-       for {
-               if v_0.Op != OpAMD64ANDLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c&0x8000 == 0) {
-                       break
-               }
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0x7fff
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQSX (MOVWQSX x))
-       // result: (MOVWQSX x)
-       for {
-               if v_0.Op != OpAMD64MOVWQSX {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVWQSX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQSX (MOVBQSX x))
-       // result: (MOVBQSX x)
-       for {
-               if v_0.Op != OpAMD64MOVBQSX {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQSX)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWQSXload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVWQSX x)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVWstore {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               x := v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWQSX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWQSXload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(base, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVWload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWQZX x)
-       // cond: zeroUpper48Bits(x,3)
-       // result: x
-       for {
-               x := v_0
-               if !(zeroUpper48Bits(x, 3)) {
-                       break
-               }
-               v.copyOf(x)
-               return true
-       }
-       // match: (MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVWloadidx1 {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVWloadidx2 {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               mem := x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx2, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWQZX (ANDLconst [c] x))
-       // result: (ANDLconst [c & 0xffff] x)
-       for {
-               if v_0.Op != OpAMD64ANDLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0xffff
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQZX (MOVWQZX x))
-       // result: (MOVWQZX x)
-       for {
-               if v_0.Op != OpAMD64MOVWQZX {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVWQZX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQZX (MOVBQZX x))
-       // result: (MOVBQZX x)
-       for {
-               if v_0.Op != OpAMD64MOVBQZX {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQZX)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVWQZX x)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVWstore {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               x := v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWQZX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(base, mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ2 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWloadidx2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       mem := v_1
-                       if !(ptr.Op != OpSB) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVWloadidx1)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAL {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(base, mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDLconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWload [off] {sym} (SB) _)
-       // cond: symIsRO(sym)
-       // result: (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpSB || !(symIsRO(sym)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder))
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWloadidx1(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem)
-       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 {
-                               continue
-                       }
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       v.reset(OpAMD64MOVWloadidx2)
-                       v.AuxInt = c
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVWloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       mem := v_2
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVWloadidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVWloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVWloadidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVWloadidx1 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVWload [i+c] {s} p mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       if v_1.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       c := v_1.AuxInt
-                       mem := v_2
-                       if !(is32Bit(i + c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVWload)
-                       v.AuxInt = i + c
-                       v.Aux = s
-                       v.AddArg2(p, mem)
-                       return true
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+                       break
                }
-               break
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg2(ptr, mem)
+               return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWloadidx2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVWloadidx2 [c+d] {sym} ptr idx mem)
+       // match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem))
+       // cond: config.useSSE && x.Uses == 1 && same(p0, p1, 1) && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)
+       // result: (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem)
        for {
                c := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
+               s := v.Aux
+               p1 := v_0
+               x := v_1
+               if x.Op != OpAMD64MOVQstoreconst {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
-               mem := v_2
-               if !(is32Bit(c + d)) {
+               c2 := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx2)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
+               mem := x.Args[1]
+               p0 := x.Args[0]
+               if !(config.useSSE && x.Uses == 1 && same(p0, p1, 1) && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVOstore)
+               v.AuxInt = ValAndOff(c2).Off()
+               v.Aux = s
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVOconst, types.TypeInt128)
+               v0.AuxInt = 0
+               v.AddArg3(p0, v0, mem)
                return true
        }
-       // match: (MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+2*d)
-       // result: (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
+       // match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(c + 2*d)) {
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx2)
-               v.AuxInt = c + 2*d
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWloadidx2 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+2*c)
-       // result: (MOVWload [i+2*c] {s} p mem)
+       // match: (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
-               i := v.AuxInt
+               sc := v.AuxInt
                s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               c := v_1.AuxInt
-               mem := v_2
-               if !(is32Bit(i + 2*c)) {
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = i + 2*c
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = s
-               v.AddArg2(p, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem)
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSDload [off1+off2] {sym} ptr mem)
        for {
-               off := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVWQSX {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDload)
+               v.AuxInt = off1 + off2
                v.Aux = sym
-               v.AddArg3(ptr, x, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem)
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg2(base, mem)
+               return true
+       }
+       // match: (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _))
+       // result: (MOVQi2f val)
        for {
                off := v.AuxInt
                sym := v.Aux
                ptr := v_0
-               if v_1.Op != OpAMD64MOVWQZX {
+               if v_1.Op != OpAMD64MOVQstore || v_1.AuxInt != off || v_1.Aux != sym {
                        break
                }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg3(ptr, x, mem)
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64MOVQi2f)
+               v.AddArg(val)
                return true
        }
-       // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSDstore(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -18459,57 +14464,81 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
+               v.reset(OpAMD64MOVSDstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
+       // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLconst {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_1.AuxInt
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v_1
                mem := v_2
-               if !(validOff(off)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
-               v.Aux = sym
-               v.AddArg2(ptr, mem)
+               v.reset(OpAMD64MOVSDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVQconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
+       // match: (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem)
+       // result: (MOVQstore [off] {sym} ptr val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                ptr := v_0
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVQi2f {
                        break
                }
-               c := v_1.AuxInt
+               val := v_1.Args[0]
                mem := v_2
-               if !(validOff(off)) {
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSSload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSSload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSload)
+               v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -18519,1139 +14548,1201 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                base := v_0.Args[0]
-               val := v_1
-               mem := v_2
+               mem := v_1
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
+               v.reset(OpAMD64MOVSSload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(base, val, mem)
+               v.AddArg2(base, mem)
+               return true
+       }
+       // match: (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _))
+       // result: (MOVLi2f val)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLstore || v_1.AuxInt != off || v_1.Aux != sym {
+                       break
+               }
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64MOVLi2f)
+               v.AddArg(val)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSSstore(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
+               sym := v.Aux
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
                off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
                ptr := v_0.Args[0]
                val := v_1
                mem := v_2
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
+               v.reset(OpAMD64MOVSSstore)
                v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
+               v.Aux = sym
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ2 {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               base := v_0.Args[0]
                val := v_1
                mem := v_2
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx2)
+               v.reset(OpAMD64MOVSSstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg4(ptr, idx, val, mem)
+               v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem)
+       // match: (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem)
+       // result: (MOVLstore [off] {sym} ptr val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLi2f {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr := v_0_0
-                       idx := v_0_1
-                       val := v_1
-                       mem := v_2
-                       if !(ptr.Op != OpSB) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVWstoreidx1)
-                       v.AuxInt = off
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
-               }
-               break
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg3(ptr, val, mem)
+               return true
        }
-       // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w mem)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 16 {
+               x := v_0
+               if x.Op != OpAMD64MOVWload {
                        break
                }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+               off := x.AuxInt
+               sym := x.Aux
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg3(p, w, mem)
+               off := x.AuxInt
+               sym := x.Aux
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
+       // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w mem)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 16 {
+               x := v_0
+               if x.Op != OpAMD64MOVQload {
                        break
                }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+               off := x.AuxInt
+               sym := x.Aux
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVWQSX (ANDLconst [c] x))
+       // cond: c & 0x8000 == 0
+       // result: (ANDLconst [c & 0x7fff] x)
+       for {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg3(p, w, mem)
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c&0x8000 == 0) {
+                       break
+               }
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0x7fff
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w0 mem)
+       // match: (MOVWQSX (MOVWQSX x))
+       // result: (MOVWQSX x)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64SHRLconst {
+               if v_0.Op != OpAMD64MOVWQSX {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVWQSX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWQSX (MOVBQSX x))
+       // result: (MOVBQSX x)
+       for {
+               if v_0.Op != OpAMD64MOVBQSX {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQSX)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWQSXload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVWQSX x)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVWstore {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg3(p, w0, mem)
+               v.reset(OpAMD64MOVWQSX)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w0 mem)
+       // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64SHRQconst {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] {
+               v.reset(OpAMD64MOVWQSXload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg2(base, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVWload {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               off := x.AuxInt
+               sym := x.Aux
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg3(p, w0, mem)
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) mem2:(MOVWstore [i-2] {s} p x2:(MOVWload [j-2] {s2} p2 mem) mem))
-       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)
-       // result: (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem)
+       // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               x1 := v_1
-               if x1.Op != OpAMD64MOVWload {
-                       break
-               }
-               j := x1.AuxInt
-               s2 := x1.Aux
-               mem := x1.Args[1]
-               p2 := x1.Args[0]
-               mem2 := v_2
-               if mem2.Op != OpAMD64MOVWstore || mem2.AuxInt != i-2 || mem2.Aux != s {
+               x := v_0
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               _ = mem2.Args[2]
-               if p != mem2.Args[0] {
+               off := x.AuxInt
+               sym := x.Aux
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               x2 := mem2.Args[1]
-               if x2.Op != OpAMD64MOVWload || x2.AuxInt != j-2 || x2.Aux != s2 {
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVQload {
                        break
                }
-               _ = x2.Args[1]
-               if p2 != x2.Args[0] || mem != x2.Args[1] || mem != mem2.Args[2] || !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)) {
+               off := x.AuxInt
+               sym := x.Aux
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v0 := b.NewValue0(x2.Pos, OpAMD64MOVLload, typ.UInt32)
-               v0.AuxInt = j - 2
-               v0.Aux = s2
-               v0.AddArg2(p2, mem)
-               v.AddArg3(p, v0, mem)
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (MOVWQZX x)
+       // cond: zeroUpper48Bits(x,3)
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAL {
+               x := v_0
+               if !(zeroUpper48Bits(x, 3)) {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+               v.copyOf(x)
+               return true
+       }
+       // match: (MOVWQZX (ANDLconst [c] x))
+       // result: (ANDLconst [c & 0xffff] x)
+       for {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(base, val, mem)
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0xffff
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
+       // match: (MOVWQZX (MOVWQZX x))
+       // result: (MOVWQZX x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDLconst {
+               if v_0.Op != OpAMD64MOVWQZX {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(off1 + off2)) {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVWQZX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWQZX (MOVBQZX x))
+       // result: (MOVBQZX x)
+       for {
+               if v_0.Op != OpAMD64MOVBQZX {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg3(ptr, val, mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQZX)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       b := v.Block
+       config := b.Func.Config
+       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVWQZX x)
        for {
-               sc := v.AuxInt
-               s := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVWstore {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(ValAndOff(sc).canAdd(off)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg2(ptr, mem)
+               v.reset(OpAMD64MOVWQZX)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWload [off1+off2] {sym} ptr mem)
        for {
-               sc := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ {
+               off1 := v.AuxInt
+               sym := v.Aux
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
+               off2 := v_0.AuxInt
                ptr := v_0.Args[0]
                mem := v_1
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               x := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ1 {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               base := v_0.Args[0]
                mem := v_1
-               if !(canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
+               v.AddArg2(base, mem)
                return true
        }
-       // match: (MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVWload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVWload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
        for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               if v_0.Op != OpAMD64LEAQ2 {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ1 {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
                mem := v_1
-               if !(canMergeSym(sym1, sym2)) {
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       // match: (MOVWstoreconst [x] {sym} (ADDQ ptr idx) mem)
-       // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem)
+       // match: (MOVWload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVWload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
        for {
-               x := v.AuxInt
-               sym := v.Aux
-               if v_0.Op != OpAMD64ADDQ {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ2 {
                        break
                }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
                mem := v_1
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = x
-               v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+       // match: (MOVWload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVWload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v_0
-               x := v_1
-               if x.Op != OpAMD64MOVWstoreconst {
-                       break
-               }
-               a := x.AuxInt
-               if x.Aux != s {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ4 {
                        break
                }
-               mem := x.Args[1]
-               if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               mem := v_1
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg2(p, mem)
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+       // match: (MOVWload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVWload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
        for {
-               a := v.AuxInt
-               s := v.Aux
-               p := v_0
-               x := v_1
-               if x.Op != OpAMD64MOVWstoreconst {
-                       break
-               }
-               c := x.AuxInt
-               if x.Aux != s {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ8 {
                        break
                }
-               mem := x.Args[1]
-               if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               mem := v_1
+               if !(i1 != 0 && is32Bit(i0+i1)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg2(p, mem)
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, mem)
                return true
        }
-       // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               sc := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
                if v_0.Op != OpAMD64LEAL {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               ptr := v_0.Args[0]
+               base := v_0.Args[0]
                mem := v_1
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg2(ptr, mem)
+               v.AddArg2(base, mem)
                return true
        }
-       // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWload [off1+off2] {sym} ptr mem)
        for {
-               sc := v.AuxInt
-               s := v.Aux
+               off1 := v.AuxInt
+               sym := v.Aux
                if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                ptr := v_0.Args[0]
                mem := v_1
-               if !(ValAndOff(sc).canAdd(off)) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
                v.AddArg2(ptr, mem)
                return true
        }
+       // match: (MOVWload [off] {sym} (SB) _)
+       // cond: symIsRO(sym)
+       // result: (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               if v_0.Op != OpSB || !(symIsRO(sym)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder))
+               return true
+       }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem)
-       // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem)
+       // result: (MOVWstore [off] {sym} ptr x mem)
        for {
-               c := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 {
-                               continue
-                       }
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       v.reset(OpAMD64MOVWstoreconstidx2)
-                       v.AuxInt = c
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVWQSX {
+                       break
                }
-               break
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg3(ptr, x, mem)
+               return true
        }
-       // match: (MOVWstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem)
+       // result: (MOVWstore [off] {sym} ptr x mem)
        for {
-               x := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       c := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       mem := v_2
-                       if !(ValAndOff(x).canAdd(c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVWstoreconstidx1)
-                       v.AuxInt = ValAndOff(x).add(c)
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVWQZX {
+                       break
                }
-               break
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg3(ptr, x, mem)
+               return true
        }
-       // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
        for {
-               x := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       c := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       mem := v_2
-                       if !(ValAndOff(x).canAdd(c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVWstoreconstidx1)
-                       v.AuxInt = ValAndOff(x).add(c)
-                       v.Aux = sym
-                       v.AddArg3(ptr, idx, mem)
-                       return true
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
                }
-               break
-       }
-       // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       i := v_1
-                       x := v_2
-                       if x.Op != OpAMD64MOVWstoreconstidx1 {
-                               continue
-                       }
-                       a := x.AuxInt
-                       if x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[2]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || i != x_1 || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVLstoreconstidx1)
-                               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-                               v.Aux = s
-                               v.AddArg3(p, i, mem)
-                               return true
-                       }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(off1 + off2)) {
+                       break
                }
-               break
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg3(ptr, val, mem)
+               return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWstoreconstidx2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
        for {
-               x := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v_1
+               c := v_1.AuxInt
                mem := v_2
-               if !(ValAndOff(x).canAdd(c)) {
+               if !(validOff(off)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(c)
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
                v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(2*c)
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVQconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
        for {
-               x := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
                c := v_1.AuxInt
-               idx := v_1.Args[0]
                mem := v_2
-               if !(ValAndOff(x).canAdd(2 * c)) {
+               if !(validOff(off)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(2 * c)
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
                v.Aux = sym
-               v.AddArg3(ptr, idx, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem)
+       // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               p := v_0
-               i := v_1
-               x := v_2
-               if x.Op != OpAMD64MOVWstoreconstidx2 {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg3(base, val, mem)
+               return true
+       }
+       // match: (MOVWstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVWstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
+       for {
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ1 {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type)
-               v0.AuxInt = 1
-               v0.AddArg(i)
-               v.AddArg3(p, v0, mem)
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
+               mem := v_2
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWstoreidx1(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem)
-       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
+       // match: (MOVWstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVWstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 {
-                               continue
-                       }
-                       idx := v_1.Args[0]
-                       val := v_2
-                       mem := v_3
-                       v.reset(OpAMD64MOVWstoreidx2)
-                       v.AuxInt = c
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ2 {
+                       break
                }
-               break
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
+               mem := v_2
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
+               return true
        }
-       // match: (MOVWstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (MOVWstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVWstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_0.AuxInt
-                       ptr := v_0.Args[0]
-                       idx := v_1
-                       val := v_2
-                       mem := v_3
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVWstoreidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ4 {
+                       break
                }
-               break
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
+               mem := v_2
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
+               return true
        }
-       // match: (MOVWstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (MOVWstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem)
+       // cond: i1 != 0 && is32Bit(i0+i1)
+       // result: (MOVWstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       ptr := v_0
-                       if v_1.Op != OpAMD64ADDQconst {
-                               continue
-                       }
-                       d := v_1.AuxInt
-                       idx := v_1.Args[0]
-                       val := v_2
-                       mem := v_3
-                       if !(is32Bit(c + d)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVWstoreidx1)
-                       v.AuxInt = c + d
-                       v.Aux = sym
-                       v.AddArg4(ptr, idx, val, mem)
-                       return true
+               i0 := v.AuxInt
+               s0 := v.Aux
+               l := v_0
+               if l.Op != OpAMD64LEAQ8 {
+                       break
                }
-               break
+               i1 := l.AuxInt
+               s1 := l.Aux
+               y := l.Args[1]
+               x := l.Args[0]
+               val := v_1
+               mem := v_2
+               if !(i1 != 0 && is32Bit(i0+i1)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i0 + i1
+               v.Aux = s0
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
+               v0.AuxInt = 0
+               v0.Aux = s1
+               v0.AddArg2(x, y)
+               v.AddArg3(v0, val, mem)
+               return true
        }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVLstore [i-2] {s} p0 w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       if v_2.Op != OpAMD64SHRLconst || v_2.AuxInt != 16 {
-                               continue
-                       }
-                       w := v_2.Args[0]
-                       x := v_3
-                       if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVLstoreidx1)
-                               v.AuxInt = i - 2
-                               v.Aux = s
-                               v.AddArg4(p, idx, w, mem)
-                               return true
-                       }
+               p1 := v_0
+               if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 16 {
+                       break
                }
-               break
+               w := v_1.Args[0]
+               x := v_2
+               if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+                       break
+               }
+               mem := x.Args[2]
+               p0 := x.Args[0]
+               if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg3(p0, w, mem)
+               return true
        }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       // match: (MOVWstore [i] {s} p1 (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVLstore [i-2] {s} p0 w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 16 {
-                               continue
-                       }
-                       w := v_2.Args[0]
-                       x := v_3
-                       if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVLstoreidx1)
-                               v.AuxInt = i - 2
-                               v.Aux = s
-                               v.AddArg4(p, idx, w, mem)
-                               return true
-                       }
+               p1 := v_0
+               if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 16 {
+                       break
                }
-               break
+               w := v_1.Args[0]
+               x := v_2
+               if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+                       break
+               }
+               mem := x.Args[2]
+               p0 := x.Args[0]
+               if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg3(p0, w, mem)
+               return true
        }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVLstore [i-2] {s} p0 w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       if v_2.Op != OpAMD64SHRLconst {
-                               continue
-                       }
-                       j := v_2.AuxInt
-                       w := v_2.Args[0]
-                       x := v_3
-                       if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || idx != x_1 {
-                                       continue
-                               }
-                               w0 := x.Args[2]
-                               if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVLstoreidx1)
-                               v.AuxInt = i - 2
-                               v.Aux = s
-                               v.AddArg4(p, idx, w0, mem)
-                               return true
-                       }
+               p1 := v_0
+               if v_1.Op != OpAMD64SHRLconst {
+                       break
                }
-               break
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v_2
+               if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+                       break
+               }
+               mem := x.Args[2]
+               p0 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg3(p0, w0, mem)
+               return true
        }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       // match: (MOVWstore [i] {s} p1 (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRQconst [j-16] w) mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+       // result: (MOVLstore [i-2] {s} p0 w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       idx := v_1
-                       if v_2.Op != OpAMD64SHRQconst {
-                               continue
-                       }
-                       j := v_2.AuxInt
-                       w := v_2.Args[0]
-                       x := v_3
-                       if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       x_0 := x.Args[0]
-                       x_1 := x.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 {
-                               if p != x_0 || idx != x_1 {
-                                       continue
-                               }
-                               w0 := x.Args[2]
-                               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                                       continue
-                               }
-                               v.reset(OpAMD64MOVLstoreidx1)
-                               v.AuxInt = i - 2
-                               v.Aux = s
-                               v.AddArg4(p, idx, w0, mem)
-                               return true
-                       }
+               p1 := v_0
+               if v_1.Op != OpAMD64SHRQconst {
+                       break
                }
-               break
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v_2
+               if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+                       break
+               }
+               mem := x.Args[2]
+               p0 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg3(p0, w0, mem)
+               return true
        }
-       // match: (MOVWstoreidx1 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVWstore [i+c] {s} p w mem)
+       // match: (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) mem2:(MOVWstore [i-2] {s} p x2:(MOVWload [j-2] {s2} p2 mem) mem))
+       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)
+       // result: (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       p := v_0
-                       if v_1.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       c := v_1.AuxInt
-                       w := v_2
-                       mem := v_3
-                       if !(is32Bit(i + c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MOVWstore)
-                       v.AuxInt = i + c
-                       v.Aux = s
-                       v.AddArg3(p, w, mem)
-                       return true
+               p := v_0
+               x1 := v_1
+               if x1.Op != OpAMD64MOVWload {
+                       break
                }
-               break
+               j := x1.AuxInt
+               s2 := x1.Aux
+               mem := x1.Args[1]
+               p2 := x1.Args[0]
+               mem2 := v_2
+               if mem2.Op != OpAMD64MOVWstore || mem2.AuxInt != i-2 || mem2.Aux != s {
+                       break
+               }
+               _ = mem2.Args[2]
+               if p != mem2.Args[0] {
+                       break
+               }
+               x2 := mem2.Args[1]
+               if x2.Op != OpAMD64MOVWload || x2.AuxInt != j-2 || x2.Aux != s2 {
+                       break
+               }
+               _ = x2.Args[1]
+               if p2 != x2.Args[0] || mem != x2.Args[1] || mem != mem2.Args[2] || !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v0 := b.NewValue0(x2.Pos, OpAMD64MOVLload, typ.UInt32)
+               v0.AuxInt = j - 2
+               v0.Aux = s2
+               v0.AddArg2(p2, mem)
+               v.AddArg3(p, v0, mem)
+               return true
        }
-       // match: (MOVWstoreidx1 [off] {s} ptr idx (MOVLconst [c]) mem)
-       // cond: validValAndOff(int64(int16(c)), off)
-       // result: (MOVWstoreconstidx1 [makeValAndOff(int64(int16(c)), off)] {s} ptr idx mem)
+       // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
+       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
-               off := v.AuxInt
-               s := v.Aux
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64MOVLconst {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               c := v_2.AuxInt
-               mem := v_3
-               if !(validValAndOff(int64(int16(c)), off)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
-               v.Aux = s
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg3(base, val, mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWstoreidx2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
+       // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
        for {
-               c := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               d := v_0.AuxInt
+               off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               idx := v_1
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + d)) {
+               val := v_1
+               mem := v_2
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx2)
-               v.AuxInt = c + d
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off1 + off2
                v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+2*d)
-       // result: (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               ptr := v_0
-               if v_1.Op != OpAMD64ADDQconst {
+               sc := v.AuxInt
+               s := v.Aux
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c + 2*d)) {
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx2)
-               v.AuxInt = c + 2*d
-               v.Aux = sym
-               v.AddArg4(ptr, idx, val, mem)
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
+       // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64SHRLconst || v_2.AuxInt != 16 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v_3
-               if x.Op != OpAMD64MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
-               v.AddArg4(p, v0, w, mem)
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
+       // match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
        for {
-               i := v.AuxInt
+               c := v.AuxInt
                s := v.Aux
-               p := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 16 {
+               p1 := v_0
+               x := v_1
+               if x.Op != OpAMD64MOVWstoreconst {
                        break
                }
-               w := v_2.Args[0]
-               x := v_3
-               if x.Op != OpAMD64MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s {
+               a := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
+               mem := x.Args[1]
+               p0 := x.Args[0]
+               if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
                v.Aux = s
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
-               v.AddArg4(p, v0, w, mem)
+               v.AddArg2(p0, mem)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem)
+       // match: (MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem))
+       // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
        for {
-               i := v.AuxInt
+               a := v.AuxInt
                s := v.Aux
-               p := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v_3
-               if x.Op != OpAMD64MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s {
+               p1 := v_0
+               x := v_1
+               if x.Op != OpAMD64MOVWstoreconst {
                        break
                }
-               mem := x.Args[3]
-               if p != x.Args[0] || idx != x.Args[1] {
+               c := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               mem := x.Args[1]
+               p0 := x.Args[0]
+               if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
                v.Aux = s
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
-               v.AddArg4(p, v0, w0, mem)
+               v.AddArg2(p0, mem)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+2*c)
-       // result: (MOVWstore [i+2*c] {s} p w mem)
+       // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               p := v_0
-               if v_1.Op != OpAMD64MOVQconst {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               c := v_1.AuxInt
-               w := v_2
-               mem := v_3
-               if !(is32Bit(i + 2*c)) {
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i + 2*c
-               v.Aux = s
-               v.AddArg3(p, w, mem)
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstoreidx2 [off] {s} ptr idx (MOVLconst [c]) mem)
-       // cond: validValAndOff(int64(int16(c)), off)
-       // result: (MOVWstoreconstidx2 [makeValAndOff(int64(int16(c)), off)] {s} ptr idx mem)
+       // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
-               off := v.AuxInt
+               sc := v.AuxInt
                s := v.Aux
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               c := v_2.AuxInt
-               mem := v_3
-               if !(validValAndOff(int64(int16(c)), off)) {
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = s
-               v.AddArg3(ptr, idx, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
        return false
@@ -21672,9 +17763,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                v.copyOf(x)
                return true
        }
-       // match: (ORL x0:(MOVBload [i0] {s} p mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       // match: (ORL x0:(MOVBload [i0] {s} p0 mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem)))
+       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        x0 := v_0
@@ -21684,7 +17775,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                        i0 := x0.AuxInt
                        s := x0.Aux
                        mem := x0.Args[1]
-                       p := x0.Args[0]
+                       p0 := x0.Args[0]
                        sh := v_1
                        if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 {
                                continue
@@ -21698,7 +17789,8 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                                continue
                        }
                        _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+                       p1 := x1.Args[0]
+                       if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
                                continue
                        }
                        b = mergePoint(b, x0, x1)
@@ -21706,14 +17798,14 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                        v.copyOf(v0)
                        v0.AuxInt = i0
                        v0.Aux = s
-                       v0.AddArg2(p, mem)
+                       v0.AddArg2(p0, mem)
                        return true
                }
                break
        }
-       // match: (ORL x0:(MOVWload [i0] {s} p mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       // match: (ORL x0:(MOVWload [i0] {s} p0 mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem)))
+       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        x0 := v_0
@@ -21723,7 +17815,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                        i0 := x0.AuxInt
                        s := x0.Aux
                        mem := x0.Args[1]
-                       p := x0.Args[0]
+                       p0 := x0.Args[0]
                        sh := v_1
                        if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 {
                                continue
@@ -21737,7 +17829,8 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                                continue
                        }
                        _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+                       p1 := x1.Args[0]
+                       if mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
                                continue
                        }
                        b = mergePoint(b, x0, x1)
@@ -21745,14 +17838,14 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                        v.copyOf(v0)
                        v0.AuxInt = i0
                        v0.Aux = s
-                       v0.AddArg2(p, mem)
+                       v0.AddArg2(p0, mem)
                        return true
                }
                break
        }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
-       // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y))
+       // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+       // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        s1 := v_0
@@ -21767,7 +17860,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                        i1 := x1.AuxInt
                        s := x1.Aux
                        mem := x1.Args[1]
-                       p := x1.Args[0]
+                       p0 := x1.Args[0]
                        or := v_1
                        if or.Op != OpAMD64ORL {
                                continue
@@ -21790,11 +17883,12 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                                        continue
                                }
                                _ = x0.Args[1]
-                               if p != x0.Args[0] || mem != x0.Args[1] {
+                               p1 := x0.Args[0]
+                               if mem != x0.Args[1] {
                                        continue
                                }
                                y := or_1
-                               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+                               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
                                        continue
                                }
                                b = mergePoint(b, x0, x1, y)
@@ -21805,7 +17899,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                                v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
                                v2.AuxInt = i0
                                v2.Aux = s
-                               v2.AddArg2(p, mem)
+                               v2.AddArg2(p0, mem)
                                v1.AddArg(v2)
                                v0.AddArg2(v1, y)
                                return true
@@ -21813,177 +17907,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                }
                break
        }
-       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVBloadidx1 {
-                               continue
-                       }
-                       i0 := x0.AuxInt
-                       s := x0.Aux
-                       mem := x0.Args[2]
-                       x0_0 := x0.Args[0]
-                       x0_1 := x0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 {
-                               p := x0_0
-                               idx := x0_1
-                               sh := v_1
-                               if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 {
-                                       continue
-                               }
-                               x1 := sh.Args[0]
-                               if x1.Op != OpAMD64MOVBloadidx1 {
-                                       continue
-                               }
-                               i1 := x1.AuxInt
-                               if x1.Aux != s {
-                                       continue
-                               }
-                               _ = x1.Args[2]
-                               x1_0 := x1.Args[0]
-                               x1_1 := x1.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 {
-                                       if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                                               continue
-                                       }
-                                       b = mergePoint(b, x0, x1)
-                                       v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-                                       v.copyOf(v0)
-                                       v0.AuxInt = i0
-                                       v0.Aux = s
-                                       v0.AddArg3(p, idx, mem)
-                                       return true
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVWloadidx1 {
-                               continue
-                       }
-                       i0 := x0.AuxInt
-                       s := x0.Aux
-                       mem := x0.Args[2]
-                       x0_0 := x0.Args[0]
-                       x0_1 := x0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 {
-                               p := x0_0
-                               idx := x0_1
-                               sh := v_1
-                               if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 {
-                                       continue
-                               }
-                               x1 := sh.Args[0]
-                               if x1.Op != OpAMD64MOVWloadidx1 {
-                                       continue
-                               }
-                               i1 := x1.AuxInt
-                               if x1.Aux != s {
-                                       continue
-                               }
-                               _ = x1.Args[2]
-                               x1_0 := x1.Args[0]
-                               x1_1 := x1.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 {
-                                       if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                                               continue
-                                       }
-                                       b = mergePoint(b, x0, x1)
-                                       v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32)
-                                       v.copyOf(v0)
-                                       v0.AuxInt = i0
-                                       v0.Aux = s
-                                       v0.AddArg3(p, idx, mem)
-                                       return true
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
-       // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpAMD64SHLLconst {
-                               continue
-                       }
-                       j1 := s1.AuxInt
-                       x1 := s1.Args[0]
-                       if x1.Op != OpAMD64MOVBloadidx1 {
-                               continue
-                       }
-                       i1 := x1.AuxInt
-                       s := x1.Aux
-                       mem := x1.Args[2]
-                       x1_0 := x1.Args[0]
-                       x1_1 := x1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 {
-                               p := x1_0
-                               idx := x1_1
-                               or := v_1
-                               if or.Op != OpAMD64ORL {
-                                       continue
-                               }
-                               _ = or.Args[1]
-                               or_0 := or.Args[0]
-                               or_1 := or.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 {
-                                       s0 := or_0
-                                       if s0.Op != OpAMD64SHLLconst {
-                                               continue
-                                       }
-                                       j0 := s0.AuxInt
-                                       x0 := s0.Args[0]
-                                       if x0.Op != OpAMD64MOVBloadidx1 {
-                                               continue
-                                       }
-                                       i0 := x0.AuxInt
-                                       if x0.Aux != s {
-                                               continue
-                                       }
-                                       _ = x0.Args[2]
-                                       x0_0 := x0.Args[0]
-                                       x0_1 := x0.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, x0_0, x0_1 = _i3+1, x0_1, x0_0 {
-                                               if p != x0_0 || idx != x0_1 || mem != x0.Args[2] {
-                                                       continue
-                                               }
-                                               y := or_1
-                                               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                                       continue
-                                               }
-                                               b = mergePoint(b, x0, x1, y)
-                                               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-                                               v.copyOf(v0)
-                                               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-                                               v1.AuxInt = j0
-                                               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16)
-                                               v2.AuxInt = i0
-                                               v2.Aux = s
-                                               v2.AddArg3(p, idx, mem)
-                                               v1.AddArg(v2)
-                                               v0.AddArg2(v1, y)
-                                               return true
-                                       }
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORL x1:(MOVBload [i1] {s} p mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       // match: (ORL x1:(MOVBload [i1] {s} p0 mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p1 mem)))
+       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        x1 := v_0
@@ -21993,7 +17919,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                        i1 := x1.AuxInt
                        s := x1.Aux
                        mem := x1.Args[1]
-                       p := x1.Args[0]
+                       p0 := x1.Args[0]
                        sh := v_1
                        if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 {
                                continue
@@ -22007,7 +17933,8 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                                continue
                        }
                        _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+                       p1 := x0.Args[0]
+                       if mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
                                continue
                        }
                        b = mergePoint(b, x0, x1)
@@ -22017,15 +17944,15 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                        v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
                        v1.AuxInt = i0
                        v1.Aux = s
-                       v1.AddArg2(p, mem)
+                       v1.AddArg2(p0, mem)
                        v0.AddArg(v1)
                        return true
                }
                break
        }
-       // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
+       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        r1 := v_0
@@ -22039,7 +17966,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                        i1 := x1.AuxInt
                        s := x1.Aux
                        mem := x1.Args[1]
-                       p := x1.Args[0]
+                       p0 := x1.Args[0]
                        sh := v_1
                        if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 {
                                continue
@@ -22057,199 +17984,25 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                                continue
                        }
                        _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+                       p1 := x0.Args[0]
+                       if mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
                                continue
                        }
                        b = mergePoint(b, x0, x1)
                        v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type)
                        v.copyOf(v0)
                        v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
-                       v1.AuxInt = i0
-                       v1.Aux = s
-                       v1.AddArg2(p, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
-       // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpAMD64SHLLconst {
-                               continue
-                       }
-                       j0 := s0.AuxInt
-                       x0 := s0.Args[0]
-                       if x0.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i0 := x0.AuxInt
-                       s := x0.Aux
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORL {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s1 := or_0
-                               if s1.Op != OpAMD64SHLLconst {
-                                       continue
-                               }
-                               j1 := s1.AuxInt
-                               x1 := s1.Args[0]
-                               if x1.Op != OpAMD64MOVBload {
-                                       continue
-                               }
-                               i1 := x1.AuxInt
-                               if x1.Aux != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x1.Pos, OpAMD64ORL, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x1.Pos, OpAMD64SHLLconst, v.Type)
-                               v1.AuxInt = j1
-                               v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16)
-                               v2.AuxInt = 8
-                               v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
-                               v3.AuxInt = i0
-                               v3.Aux = s
-                               v3.AddArg2(p, mem)
-                               v2.AddArg(v3)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (ORL x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x1 := v_0
-                       if x1.Op != OpAMD64MOVBloadidx1 {
-                               continue
-                       }
-                       i1 := x1.AuxInt
-                       s := x1.Aux
-                       mem := x1.Args[2]
-                       x1_0 := x1.Args[0]
-                       x1_1 := x1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 {
-                               p := x1_0
-                               idx := x1_1
-                               sh := v_1
-                               if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 {
-                                       continue
-                               }
-                               x0 := sh.Args[0]
-                               if x0.Op != OpAMD64MOVBloadidx1 {
-                                       continue
-                               }
-                               i0 := x0.AuxInt
-                               if x0.Aux != s {
-                                       continue
-                               }
-                               _ = x0.Args[2]
-                               x0_0 := x0.Args[0]
-                               x0_1 := x0.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 {
-                                       if p != x0_0 || idx != x0_1 || mem != x0.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                                               continue
-                                       }
-                                       b = mergePoint(b, x0, x1)
-                                       v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-                                       v.copyOf(v0)
-                                       v0.AuxInt = 8
-                                       v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16)
-                                       v1.AuxInt = i0
-                                       v1.Aux = s
-                                       v1.AddArg3(p, idx, mem)
-                                       v0.AddArg(v1)
-                                       return true
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r1 := v_0
-                       if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpAMD64MOVWloadidx1 {
-                               continue
-                       }
-                       i1 := x1.AuxInt
-                       s := x1.Aux
-                       mem := x1.Args[2]
-                       x1_0 := x1.Args[0]
-                       x1_1 := x1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 {
-                               p := x1_0
-                               idx := x1_1
-                               sh := v_1
-                               if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 {
-                                       continue
-                               }
-                               r0 := sh.Args[0]
-                               if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 {
-                                       continue
-                               }
-                               x0 := r0.Args[0]
-                               if x0.Op != OpAMD64MOVWloadidx1 {
-                                       continue
-                               }
-                               i0 := x0.AuxInt
-                               if x0.Aux != s {
-                                       continue
-                               }
-                               _ = x0.Args[2]
-                               x0_0 := x0.Args[0]
-                               x0_1 := x0.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 {
-                                       if p != x0_0 || idx != x0_1 || mem != x0.Args[2] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
-                                               continue
-                                       }
-                                       b = mergePoint(b, x0, x1)
-                                       v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-                                       v.copyOf(v0)
-                                       v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32)
-                                       v1.AuxInt = i0
-                                       v1.Aux = s
-                                       v1.AddArg3(p, idx, mem)
-                                       v0.AddArg(v1)
-                                       return true
-                               }
-                       }
+                       v1.AuxInt = i0
+                       v1.Aux = s
+                       v1.AddArg2(p0, mem)
+                       v0.AddArg(v1)
+                       return true
                }
                break
        }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
-       // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y))
+       // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+       // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        s0 := v_0
@@ -22258,66 +18011,58 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                        }
                        j0 := s0.AuxInt
                        x0 := s0.Args[0]
-                       if x0.Op != OpAMD64MOVBloadidx1 {
+                       if x0.Op != OpAMD64MOVBload {
                                continue
                        }
                        i0 := x0.AuxInt
                        s := x0.Aux
-                       mem := x0.Args[2]
-                       x0_0 := x0.Args[0]
-                       x0_1 := x0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 {
-                               p := x0_0
-                               idx := x0_1
-                               or := v_1
-                               if or.Op != OpAMD64ORL {
+                       mem := x0.Args[1]
+                       p0 := x0.Args[0]
+                       or := v_1
+                       if or.Op != OpAMD64ORL {
+                               continue
+                       }
+                       _ = or.Args[1]
+                       or_0 := or.Args[0]
+                       or_1 := or.Args[1]
+                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
+                               s1 := or_0
+                               if s1.Op != OpAMD64SHLLconst {
                                        continue
                                }
-                               _ = or.Args[1]
-                               or_0 := or.Args[0]
-                               or_1 := or.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 {
-                                       s1 := or_0
-                                       if s1.Op != OpAMD64SHLLconst {
-                                               continue
-                                       }
-                                       j1 := s1.AuxInt
-                                       x1 := s1.Args[0]
-                                       if x1.Op != OpAMD64MOVBloadidx1 {
-                                               continue
-                                       }
-                                       i1 := x1.AuxInt
-                                       if x1.Aux != s {
-                                               continue
-                                       }
-                                       _ = x1.Args[2]
-                                       x1_0 := x1.Args[0]
-                                       x1_1 := x1.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, x1_0, x1_1 = _i3+1, x1_1, x1_0 {
-                                               if p != x1_0 || idx != x1_1 || mem != x1.Args[2] {
-                                                       continue
-                                               }
-                                               y := or_1
-                                               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                                       continue
-                                               }
-                                               b = mergePoint(b, x0, x1, y)
-                                               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-                                               v.copyOf(v0)
-                                               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-                                               v1.AuxInt = j1
-                                               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, typ.UInt16)
-                                               v2.AuxInt = 8
-                                               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16)
-                                               v3.AuxInt = i0
-                                               v3.Aux = s
-                                               v3.AddArg3(p, idx, mem)
-                                               v2.AddArg(v3)
-                                               v1.AddArg(v2)
-                                               v0.AddArg2(v1, y)
-                                               return true
-                                       }
+                               j1 := s1.AuxInt
+                               x1 := s1.Args[0]
+                               if x1.Op != OpAMD64MOVBload {
+                                       continue
+                               }
+                               i1 := x1.AuxInt
+                               if x1.Aux != s {
+                                       continue
                                }
+                               _ = x1.Args[1]
+                               p1 := x1.Args[0]
+                               if mem != x1.Args[1] {
+                                       continue
+                               }
+                               y := or_1
+                               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+                                       continue
+                               }
+                               b = mergePoint(b, x0, x1, y)
+                               v0 := b.NewValue0(x1.Pos, OpAMD64ORL, v.Type)
+                               v.copyOf(v0)
+                               v1 := b.NewValue0(x1.Pos, OpAMD64SHLLconst, v.Type)
+                               v1.AuxInt = j1
+                               v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16)
+                               v2.AuxInt = 8
+                               v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
+                               v3.AuxInt = i0
+                               v3.Aux = s
+                               v3.AddArg2(p0, mem)
+                               v2.AddArg(v3)
+                               v1.AddArg(v2)
+                               v0.AddArg2(v1, y)
+                               return true
                        }
                }
                break
@@ -22935,9 +18680,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.copyOf(x)
                return true
        }
-       // match: (ORQ x0:(MOVBload [i0] {s} p mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       // match: (ORQ x0:(MOVBload [i0] {s} p0 mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem)))
+       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        x0 := v_0
@@ -22947,7 +18692,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        i0 := x0.AuxInt
                        s := x0.Aux
                        mem := x0.Args[1]
-                       p := x0.Args[0]
+                       p0 := x0.Args[0]
                        sh := v_1
                        if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 {
                                continue
@@ -22961,7 +18706,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                continue
                        }
                        _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+                       p1 := x1.Args[0]
+                       if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
                                continue
                        }
                        b = mergePoint(b, x0, x1)
@@ -22969,14 +18715,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        v.copyOf(v0)
                        v0.AuxInt = i0
                        v0.Aux = s
-                       v0.AddArg2(p, mem)
+                       v0.AddArg2(p0, mem)
                        return true
                }
                break
        }
-       // match: (ORQ x0:(MOVWload [i0] {s} p mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       // match: (ORQ x0:(MOVWload [i0] {s} p0 mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem)))
+       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        x0 := v_0
@@ -22986,7 +18732,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        i0 := x0.AuxInt
                        s := x0.Aux
                        mem := x0.Args[1]
-                       p := x0.Args[0]
+                       p0 := x0.Args[0]
                        sh := v_1
                        if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 {
                                continue
@@ -23000,7 +18746,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                continue
                        }
                        _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+                       p1 := x1.Args[0]
+                       if mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
                                continue
                        }
                        b = mergePoint(b, x0, x1)
@@ -23008,14 +18755,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        v.copyOf(v0)
                        v0.AuxInt = i0
                        v0.Aux = s
-                       v0.AddArg2(p, mem)
+                       v0.AddArg2(p0, mem)
                        return true
                }
                break
        }
-       // match: (ORQ x0:(MOVLload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
-       // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
+       // match: (ORQ x0:(MOVLload [i0] {s} p0 mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem)))
+       // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+       // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        x0 := v_0
@@ -23025,7 +18772,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        i0 := x0.AuxInt
                        s := x0.Aux
                        mem := x0.Args[1]
-                       p := x0.Args[0]
+                       p0 := x0.Args[0]
                        sh := v_1
                        if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 {
                                continue
@@ -23039,7 +18786,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                continue
                        }
                        _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+                       p1 := x1.Args[0]
+                       if mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
                                continue
                        }
                        b = mergePoint(b, x0, x1)
@@ -23047,14 +18795,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        v.copyOf(v0)
                        v0.AuxInt = i0
                        v0.Aux = s
-                       v0.AddArg2(p, mem)
+                       v0.AddArg2(p0, mem)
                        return true
                }
                break
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
-       // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y))
+       // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        s1 := v_0
@@ -23069,7 +18817,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        i1 := x1.AuxInt
                        s := x1.Aux
                        mem := x1.Args[1]
-                       p := x1.Args[0]
+                       p0 := x1.Args[0]
                        or := v_1
                        if or.Op != OpAMD64ORQ {
                                continue
@@ -23092,11 +18840,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                        continue
                                }
                                _ = x0.Args[1]
-                               if p != x0.Args[0] || mem != x0.Args[1] {
+                               p1 := x0.Args[0]
+                               if mem != x0.Args[1] {
                                        continue
                                }
                                y := or_1
-                               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+                               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
                                        continue
                                }
                                b = mergePoint(b, x0, x1, y)
@@ -23107,7 +18856,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
                                v2.AuxInt = i0
                                v2.Aux = s
-                               v2.AddArg2(p, mem)
+                               v2.AddArg2(p0, mem)
                                v1.AddArg(v2)
                                v0.AddArg2(v1, y)
                                return true
@@ -23115,9 +18864,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                }
                break
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y))
-       // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem)) y))
+       // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p0 mem)) y)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        s1 := v_0
@@ -23132,7 +18881,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        i1 := x1.AuxInt
                        s := x1.Aux
                        mem := x1.Args[1]
-                       p := x1.Args[0]
+                       p0 := x1.Args[0]
                        or := v_1
                        if or.Op != OpAMD64ORQ {
                                continue
@@ -23155,11 +18904,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                        continue
                                }
                                _ = x0.Args[1]
-                               if p != x0.Args[0] || mem != x0.Args[1] {
+                               p1 := x0.Args[0]
+                               if mem != x0.Args[1] {
                                        continue
                                }
                                y := or_1
-                               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+                               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
                                        continue
                                }
                                b = mergePoint(b, x0, x1, y)
@@ -23170,7 +18920,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                v2 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
                                v2.AuxInt = i0
                                v2.Aux = s
-                               v2.AddArg2(p, mem)
+                               v2.AddArg2(p0, mem)
                                v1.AddArg(v2)
                                v0.AddArg2(v1, y)
                                return true
@@ -23178,297 +18928,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                }
                break
        }
-       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVBloadidx1 {
-                               continue
-                       }
-                       i0 := x0.AuxInt
-                       s := x0.Aux
-                       mem := x0.Args[2]
-                       x0_0 := x0.Args[0]
-                       x0_1 := x0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 {
-                               p := x0_0
-                               idx := x0_1
-                               sh := v_1
-                               if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 {
-                                       continue
-                               }
-                               x1 := sh.Args[0]
-                               if x1.Op != OpAMD64MOVBloadidx1 {
-                                       continue
-                               }
-                               i1 := x1.AuxInt
-                               if x1.Aux != s {
-                                       continue
-                               }
-                               _ = x1.Args[2]
-                               x1_0 := x1.Args[0]
-                               x1_1 := x1.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 {
-                                       if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                                               continue
-                                       }
-                                       b = mergePoint(b, x0, x1)
-                                       v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-                                       v.copyOf(v0)
-                                       v0.AuxInt = i0
-                                       v0.Aux = s
-                                       v0.AddArg3(p, idx, mem)
-                                       return true
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVWloadidx1 {
-                               continue
-                       }
-                       i0 := x0.AuxInt
-                       s := x0.Aux
-                       mem := x0.Args[2]
-                       x0_0 := x0.Args[0]
-                       x0_1 := x0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 {
-                               p := x0_0
-                               idx := x0_1
-                               sh := v_1
-                               if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 {
-                                       continue
-                               }
-                               x1 := sh.Args[0]
-                               if x1.Op != OpAMD64MOVWloadidx1 {
-                                       continue
-                               }
-                               i1 := x1.AuxInt
-                               if x1.Aux != s {
-                                       continue
-                               }
-                               _ = x1.Args[2]
-                               x1_0 := x1.Args[0]
-                               x1_1 := x1.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 {
-                                       if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                                               continue
-                                       }
-                                       b = mergePoint(b, x0, x1)
-                                       v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32)
-                                       v.copyOf(v0)
-                                       v0.AuxInt = i0
-                                       v0.Aux = s
-                                       v0.AddArg3(p, idx, mem)
-                                       return true
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVLloadidx1 {
-                               continue
-                       }
-                       i0 := x0.AuxInt
-                       s := x0.Aux
-                       mem := x0.Args[2]
-                       x0_0 := x0.Args[0]
-                       x0_1 := x0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 {
-                               p := x0_0
-                               idx := x0_1
-                               sh := v_1
-                               if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 {
-                                       continue
-                               }
-                               x1 := sh.Args[0]
-                               if x1.Op != OpAMD64MOVLloadidx1 {
-                                       continue
-                               }
-                               i1 := x1.AuxInt
-                               if x1.Aux != s {
-                                       continue
-                               }
-                               _ = x1.Args[2]
-                               x1_0 := x1.Args[0]
-                               x1_1 := x1.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 {
-                                       if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                                               continue
-                                       }
-                                       b = mergePoint(b, x0, x1)
-                                       v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, typ.UInt64)
-                                       v.copyOf(v0)
-                                       v0.AuxInt = i0
-                                       v0.Aux = s
-                                       v0.AddArg3(p, idx, mem)
-                                       return true
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
-       // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpAMD64SHLQconst {
-                               continue
-                       }
-                       j1 := s1.AuxInt
-                       x1 := s1.Args[0]
-                       if x1.Op != OpAMD64MOVBloadidx1 {
-                               continue
-                       }
-                       i1 := x1.AuxInt
-                       s := x1.Aux
-                       mem := x1.Args[2]
-                       x1_0 := x1.Args[0]
-                       x1_1 := x1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 {
-                               p := x1_0
-                               idx := x1_1
-                               or := v_1
-                               if or.Op != OpAMD64ORQ {
-                                       continue
-                               }
-                               _ = or.Args[1]
-                               or_0 := or.Args[0]
-                               or_1 := or.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 {
-                                       s0 := or_0
-                                       if s0.Op != OpAMD64SHLQconst {
-                                               continue
-                                       }
-                                       j0 := s0.AuxInt
-                                       x0 := s0.Args[0]
-                                       if x0.Op != OpAMD64MOVBloadidx1 {
-                                               continue
-                                       }
-                                       i0 := x0.AuxInt
-                                       if x0.Aux != s {
-                                               continue
-                                       }
-                                       _ = x0.Args[2]
-                                       x0_0 := x0.Args[0]
-                                       x0_1 := x0.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, x0_0, x0_1 = _i3+1, x0_1, x0_0 {
-                                               if p != x0_0 || idx != x0_1 || mem != x0.Args[2] {
-                                                       continue
-                                               }
-                                               y := or_1
-                                               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                                       continue
-                                               }
-                                               b = mergePoint(b, x0, x1, y)
-                                               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-                                               v.copyOf(v0)
-                                               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-                                               v1.AuxInt = j0
-                                               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16)
-                                               v2.AuxInt = i0
-                                               v2.Aux = s
-                                               v2.AddArg3(p, idx, mem)
-                                               v1.AddArg(v2)
-                                               v0.AddArg2(v1, y)
-                                               return true
-                                       }
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y))
-       // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpAMD64SHLQconst {
-                               continue
-                       }
-                       j1 := s1.AuxInt
-                       x1 := s1.Args[0]
-                       if x1.Op != OpAMD64MOVWloadidx1 {
-                               continue
-                       }
-                       i1 := x1.AuxInt
-                       s := x1.Aux
-                       mem := x1.Args[2]
-                       x1_0 := x1.Args[0]
-                       x1_1 := x1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 {
-                               p := x1_0
-                               idx := x1_1
-                               or := v_1
-                               if or.Op != OpAMD64ORQ {
-                                       continue
-                               }
-                               _ = or.Args[1]
-                               or_0 := or.Args[0]
-                               or_1 := or.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 {
-                                       s0 := or_0
-                                       if s0.Op != OpAMD64SHLQconst {
-                                               continue
-                                       }
-                                       j0 := s0.AuxInt
-                                       x0 := s0.Args[0]
-                                       if x0.Op != OpAMD64MOVWloadidx1 {
-                                               continue
-                                       }
-                                       i0 := x0.AuxInt
-                                       if x0.Aux != s {
-                                               continue
-                                       }
-                                       _ = x0.Args[2]
-                                       x0_0 := x0.Args[0]
-                                       x0_1 := x0.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, x0_0, x0_1 = _i3+1, x0_1, x0_0 {
-                                               if p != x0_0 || idx != x0_1 || mem != x0.Args[2] {
-                                                       continue
-                                               }
-                                               y := or_1
-                                               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                                       continue
-                                               }
-                                               b = mergePoint(b, x0, x1, y)
-                                               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-                                               v.copyOf(v0)
-                                               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-                                               v1.AuxInt = j0
-                                               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32)
-                                               v2.AuxInt = i0
-                                               v2.Aux = s
-                                               v2.AddArg3(p, idx, mem)
-                                               v1.AddArg(v2)
-                                               v0.AddArg2(v1, y)
-                                               return true
-                                       }
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORQ x1:(MOVBload [i1] {s} p mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       // match: (ORQ x1:(MOVBload [i1] {s} p0 mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p1 mem)))
+       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        x1 := v_0
@@ -23478,7 +18940,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        i1 := x1.AuxInt
                        s := x1.Aux
                        mem := x1.Args[1]
-                       p := x1.Args[0]
+                       p0 := x1.Args[0]
                        sh := v_1
                        if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 {
                                continue
@@ -23492,7 +18954,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                continue
                        }
                        _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+                       p1 := x0.Args[0]
+                       if mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
                                continue
                        }
                        b = mergePoint(b, x0, x1)
@@ -23502,15 +18965,15 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
                        v1.AuxInt = i0
                        v1.Aux = s
-                       v1.AddArg2(p, mem)
+                       v1.AddArg2(p0, mem)
                        v0.AddArg(v1)
                        return true
                }
                break
        }
-       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
+       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        r1 := v_0
@@ -23524,7 +18987,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        i1 := x1.AuxInt
                        s := x1.Aux
                        mem := x1.Args[1]
-                       p := x1.Args[0]
+                       p0 := x1.Args[0]
                        sh := v_1
                        if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 {
                                continue
@@ -23542,7 +19005,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                continue
                        }
                        _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+                       p1 := x0.Args[0]
+                       if mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
                                continue
                        }
                        b = mergePoint(b, x0, x1)
@@ -23551,15 +19015,15 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
                        v1.AuxInt = i0
                        v1.Aux = s
-                       v1.AddArg2(p, mem)
+                       v1.AddArg2(p0, mem)
                        v0.AddArg(v1)
                        return true
                }
                break
        }
-       // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
-       // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
+       // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem))))
+       // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p0 mem))
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        r1 := v_0
@@ -23573,7 +19037,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        i1 := x1.AuxInt
                        s := x1.Aux
                        mem := x1.Args[1]
-                       p := x1.Args[0]
+                       p0 := x1.Args[0]
                        sh := v_1
                        if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 {
                                continue
@@ -23591,7 +19055,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                continue
                        }
                        _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+                       p1 := x0.Args[0]
+                       if mem != x0.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
                                continue
                        }
                        b = mergePoint(b, x0, x1)
@@ -23600,15 +19065,15 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        v1 := b.NewValue0(x0.Pos, OpAMD64MOVQload, typ.UInt64)
                        v1.AuxInt = i0
                        v1.Aux = s
-                       v1.AddArg2(p, mem)
+                       v1.AddArg2(p0, mem)
                        v0.AddArg(v1)
                        return true
                }
                break
        }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
-       // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y))
+       // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        s0 := v_0
@@ -23623,7 +19088,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        i0 := x0.AuxInt
                        s := x0.Aux
                        mem := x0.Args[1]
-                       p := x0.Args[0]
+                       p0 := x0.Args[0]
                        or := v_1
                        if or.Op != OpAMD64ORQ {
                                continue
@@ -23646,11 +19111,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                        continue
                                }
                                _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
+                               p1 := x1.Args[0]
+                               if mem != x1.Args[1] {
                                        continue
                                }
                                y := or_1
-                               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+                               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
                                        continue
                                }
                                b = mergePoint(b, x0, x1, y)
@@ -23663,7 +19129,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
                                v3.AuxInt = i0
                                v3.Aux = s
-                               v3.AddArg2(p, mem)
+                               v3.AddArg2(p0, mem)
                                v2.AddArg(v3)
                                v1.AddArg(v2)
                                v0.AddArg2(v1, y)
@@ -23672,9 +19138,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                }
                break
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y))
-       // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y)
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem))) y))
+       // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)
+       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p0 mem))) y)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        s0 := v_0
@@ -23693,7 +19159,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        i0 := x0.AuxInt
                        s := x0.Aux
                        mem := x0.Args[1]
-                       p := x0.Args[0]
+                       p0 := x0.Args[0]
                        or := v_1
                        if or.Op != OpAMD64ORQ {
                                continue
@@ -23720,11 +19186,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                        continue
                                }
                                _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
+                               p1 := x1.Args[0]
+                               if mem != x1.Args[1] {
                                        continue
                                }
                                y := or_1
-                               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) {
+                               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) {
                                        continue
                                }
                                b = mergePoint(b, x0, x1, y)
@@ -23736,7 +19203,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                                v3 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32)
                                v3.AuxInt = i0
                                v3.Aux = s
-                               v3.AddArg2(p, mem)
+                               v3.AddArg2(p0, mem)
                                v2.AddArg(v3)
                                v1.AddArg(v2)
                                v0.AddArg2(v1, y)
@@ -23745,330 +19212,6 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                }
                break
        }
-       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x1 := v_0
-                       if x1.Op != OpAMD64MOVBloadidx1 {
-                               continue
-                       }
-                       i1 := x1.AuxInt
-                       s := x1.Aux
-                       mem := x1.Args[2]
-                       x1_0 := x1.Args[0]
-                       x1_1 := x1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 {
-                               p := x1_0
-                               idx := x1_1
-                               sh := v_1
-                               if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 {
-                                       continue
-                               }
-                               x0 := sh.Args[0]
-                               if x0.Op != OpAMD64MOVBloadidx1 {
-                                       continue
-                               }
-                               i0 := x0.AuxInt
-                               if x0.Aux != s {
-                                       continue
-                               }
-                               _ = x0.Args[2]
-                               x0_0 := x0.Args[0]
-                               x0_1 := x0.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 {
-                                       if p != x0_0 || idx != x0_1 || mem != x0.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                                               continue
-                                       }
-                                       b = mergePoint(b, x0, x1)
-                                       v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-                                       v.copyOf(v0)
-                                       v0.AuxInt = 8
-                                       v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16)
-                                       v1.AuxInt = i0
-                                       v1.Aux = s
-                                       v1.AddArg3(p, idx, mem)
-                                       v0.AddArg(v1)
-                                       return true
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r1 := v_0
-                       if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpAMD64MOVWloadidx1 {
-                               continue
-                       }
-                       i1 := x1.AuxInt
-                       s := x1.Aux
-                       mem := x1.Args[2]
-                       x1_0 := x1.Args[0]
-                       x1_1 := x1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 {
-                               p := x1_0
-                               idx := x1_1
-                               sh := v_1
-                               if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 {
-                                       continue
-                               }
-                               r0 := sh.Args[0]
-                               if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 {
-                                       continue
-                               }
-                               x0 := r0.Args[0]
-                               if x0.Op != OpAMD64MOVWloadidx1 {
-                                       continue
-                               }
-                               i0 := x0.AuxInt
-                               if x0.Aux != s {
-                                       continue
-                               }
-                               _ = x0.Args[2]
-                               x0_0 := x0.Args[0]
-                               x0_1 := x0.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 {
-                                       if p != x0_0 || idx != x0_1 || mem != x0.Args[2] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
-                                               continue
-                                       }
-                                       b = mergePoint(b, x0, x1)
-                                       v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-                                       v.copyOf(v0)
-                                       v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32)
-                                       v1.AuxInt = i0
-                                       v1.Aux = s
-                                       v1.AddArg3(p, idx, mem)
-                                       v0.AddArg(v1)
-                                       return true
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r1 := v_0
-                       if r1.Op != OpAMD64BSWAPL {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpAMD64MOVLloadidx1 {
-                               continue
-                       }
-                       i1 := x1.AuxInt
-                       s := x1.Aux
-                       mem := x1.Args[2]
-                       x1_0 := x1.Args[0]
-                       x1_1 := x1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 {
-                               p := x1_0
-                               idx := x1_1
-                               sh := v_1
-                               if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 {
-                                       continue
-                               }
-                               r0 := sh.Args[0]
-                               if r0.Op != OpAMD64BSWAPL {
-                                       continue
-                               }
-                               x0 := r0.Args[0]
-                               if x0.Op != OpAMD64MOVLloadidx1 {
-                                       continue
-                               }
-                               i0 := x0.AuxInt
-                               if x0.Aux != s {
-                                       continue
-                               }
-                               _ = x0.Args[2]
-                               x0_0 := x0.Args[0]
-                               x0_1 := x0.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 {
-                                       if p != x0_0 || idx != x0_1 || mem != x0.Args[2] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
-                                               continue
-                                       }
-                                       b = mergePoint(b, x0, x1)
-                                       v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
-                                       v.copyOf(v0)
-                                       v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, typ.UInt64)
-                                       v1.AuxInt = i0
-                                       v1.Aux = s
-                                       v1.AddArg3(p, idx, mem)
-                                       v0.AddArg(v1)
-                                       return true
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
-       // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpAMD64SHLQconst {
-                               continue
-                       }
-                       j0 := s0.AuxInt
-                       x0 := s0.Args[0]
-                       if x0.Op != OpAMD64MOVBloadidx1 {
-                               continue
-                       }
-                       i0 := x0.AuxInt
-                       s := x0.Aux
-                       mem := x0.Args[2]
-                       x0_0 := x0.Args[0]
-                       x0_1 := x0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 {
-                               p := x0_0
-                               idx := x0_1
-                               or := v_1
-                               if or.Op != OpAMD64ORQ {
-                                       continue
-                               }
-                               _ = or.Args[1]
-                               or_0 := or.Args[0]
-                               or_1 := or.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 {
-                                       s1 := or_0
-                                       if s1.Op != OpAMD64SHLQconst {
-                                               continue
-                                       }
-                                       j1 := s1.AuxInt
-                                       x1 := s1.Args[0]
-                                       if x1.Op != OpAMD64MOVBloadidx1 {
-                                               continue
-                                       }
-                                       i1 := x1.AuxInt
-                                       if x1.Aux != s {
-                                               continue
-                                       }
-                                       _ = x1.Args[2]
-                                       x1_0 := x1.Args[0]
-                                       x1_1 := x1.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, x1_0, x1_1 = _i3+1, x1_1, x1_0 {
-                                               if p != x1_0 || idx != x1_1 || mem != x1.Args[2] {
-                                                       continue
-                                               }
-                                               y := or_1
-                                               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                                       continue
-                                               }
-                                               b = mergePoint(b, x0, x1, y)
-                                               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-                                               v.copyOf(v0)
-                                               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-                                               v1.AuxInt = j1
-                                               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, typ.UInt16)
-                                               v2.AuxInt = 8
-                                               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16)
-                                               v3.AuxInt = i0
-                                               v3.Aux = s
-                                               v3.AddArg3(p, idx, mem)
-                                               v2.AddArg(v3)
-                                               v1.AddArg(v2)
-                                               v0.AddArg2(v1, y)
-                                               return true
-                                       }
-                               }
-                       }
-               }
-               break
-       }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y))
-       // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpAMD64SHLQconst {
-                               continue
-                       }
-                       j0 := s0.AuxInt
-                       r0 := s0.Args[0]
-                       if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 {
-                               continue
-                       }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpAMD64MOVWloadidx1 {
-                               continue
-                       }
-                       i0 := x0.AuxInt
-                       s := x0.Aux
-                       mem := x0.Args[2]
-                       x0_0 := x0.Args[0]
-                       x0_1 := x0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 {
-                               p := x0_0
-                               idx := x0_1
-                               or := v_1
-                               if or.Op != OpAMD64ORQ {
-                                       continue
-                               }
-                               _ = or.Args[1]
-                               or_0 := or.Args[0]
-                               or_1 := or.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 {
-                                       s1 := or_0
-                                       if s1.Op != OpAMD64SHLQconst {
-                                               continue
-                                       }
-                                       j1 := s1.AuxInt
-                                       r1 := s1.Args[0]
-                                       if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 {
-                                               continue
-                                       }
-                                       x1 := r1.Args[0]
-                                       if x1.Op != OpAMD64MOVWloadidx1 {
-                                               continue
-                                       }
-                                       i1 := x1.AuxInt
-                                       if x1.Aux != s {
-                                               continue
-                                       }
-                                       _ = x1.Args[2]
-                                       x1_0 := x1.Args[0]
-                                       x1_1 := x1.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, x1_0, x1_1 = _i3+1, x1_1, x1_0 {
-                                               if p != x1_0 || idx != x1_1 || mem != x1.Args[2] {
-                                                       continue
-                                               }
-                                               y := or_1
-                                               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) {
-                                                       continue
-                                               }
-                                               b = mergePoint(b, x0, x1, y)
-                                               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-                                               v.copyOf(v0)
-                                               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-                                               v1.AuxInt = j1
-                                               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, typ.UInt32)
-                                               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32)
-                                               v3.AuxInt = i0
-                                               v3.Aux = s
-                                               v3.AddArg3(p, idx, mem)
-                                               v2.AddArg(v3)
-                                               v1.AddArg(v2)
-                                               v0.AddArg2(v1, y)
-                                               return true
-                                       }
-                               }
-                       }
-               }
-               break
-       }
        // match: (ORQ x l:(MOVQload [off] {sym} ptr mem))
        // cond: canMergeLoadClobber(v, l, x) && clobber(l)
        // result: (ORQload x [off] {sym} ptr mem)
index dcf586366652155164c5d3080372b913ae4297f5..9d18153a29387e2200abba0138531187c89c2da6 100644 (file)
@@ -93,3 +93,91 @@ func compMem3(x, y *int) (int, bool) {
        // 386:`CMPL\t\(`
        return r, r < *y
 }
+
+// The following functions test that indexed load/store operations get generated.
+
+func idxInt8(x, y []int8, i int) {
+       var t int8
+       // amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+       t = x[i+1]
+       // amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+       y[i+1] = t
+       // amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+       x[i+1] = 77
+}
+
+func idxInt16(x, y []int16, i int) {
+       var t int16
+       // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+       t = x[i+1]
+       // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+       y[i+1] = t
+       // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+       t = x[16*i+1]
+       // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+       y[16*i+1] = t
+       // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+       x[i+1] = 77
+       // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+       x[16*i+1] = 77
+}
+
+func idxInt32(x, y []int32, i int) {
+       var t int32
+       // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+       t = x[i+1]
+       // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+       y[i+1] = t
+       // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+       t = x[2*i+1]
+       // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+       y[2*i+1] = t
+       // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+       t = x[16*i+1]
+       // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+       y[16*i+1] = t
+       // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+       x[i+1] = 77
+       // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+       x[16*i+1] = 77
+}
+
+func idxInt64(x, y []int64, i int) {
+       var t int64
+       // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+       t = x[i+1]
+       // amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+       y[i+1] = t
+       // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
+       t = x[16*i+1]
+       // amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+       y[16*i+1] = t
+       // amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+       x[i+1] = 77
+       // amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+       x[16*i+1] = 77
+}
+
+func idxFloat32(x, y []float32, i int) {
+       var t float32
+       // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+       t = x[i+1]
+       // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+       y[i+1] = t
+       // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
+       t = x[16*i+1]
+       // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+       y[16*i+1] = t
+}
+
+func idxFloat64(x, y []float64, i int) {
+       var t float64
+       // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+       t = x[i+1]
+       // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+       y[i+1] = t
+       // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
+       t = x[16*i+1]
+       // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+       y[16*i+1] = t
+}