]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: introduce separate memory op combining pass
authorKeith Randall <khr@golang.org>
Tue, 21 Mar 2023 16:25:43 +0000 (09:25 -0700)
committerKeith Randall <khr@golang.org>
Fri, 21 Apr 2023 21:05:46 +0000 (21:05 +0000)
Memory op combining is currently done using arch-specific rewrite rules.
Instead, do them as a arch-independent rewrite pass. This ensures that
all architectures (with unaligned loads & stores) get equal treatment.

This removes a lot of rewrite rules.

The new pass is a bit more comprehensive. It handles things like out-of-order
writes and is careful not to apply partial optimizations that then block
further optimizations.

Change-Id: I780ff3bb052475cd725a923309616882d25b8d9e
Reviewed-on: https://go-review.googlesource.com/c/go/+/478475
Reviewed-by: Keith Randall <khr@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: David Chase <drchase@google.com>
17 files changed:
src/cmd/compile/internal/ssa/_gen/386.rules
src/cmd/compile/internal/ssa/_gen/AMD64.rules
src/cmd/compile/internal/ssa/_gen/ARM64.rules
src/cmd/compile/internal/ssa/_gen/PPC64.rules
src/cmd/compile/internal/ssa/_gen/PPC64Ops.go
src/cmd/compile/internal/ssa/_gen/S390X.rules
src/cmd/compile/internal/ssa/compile.go
src/cmd/compile/internal/ssa/config.go
src/cmd/compile/internal/ssa/memcombine.go [new file with mode: 0644]
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewrite386.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/cmd/compile/internal/ssa/rewriteS390X.go
src/cmd/compile/internal/ssa/value.go
src/cmd/compile/internal/test/memcombine_test.go [new file with mode: 0644]

index 4dfe05d0e992bca4acd695f1c3b06a477b0a83ae..d92dddd377af337bbc81e714c3164eda6159297e 100644 (file)
@@ -52,6 +52,7 @@
 (OffPtr [off] ptr) => (ADDLconst [int32(off)] ptr)
 
 (Bswap32 ...) => (BSWAPL ...)
+(Bswap16 x) => (ROLWconst [8] x)
 
 (Sqrt ...) => (SQRTSD ...)
 (Sqrt32 ...) => (SQRTSS ...)
 // Convert LEAL1 back to ADDL if we can
 (LEAL1 [0] {nil} x y) => (ADDL x y)
 
-// Combining byte loads into larger (unaligned) loads.
-// There are many ways these combinations could occur.  This is
-// designed to match the way encoding/binary.LittleEndian does it.
-(ORL                  x0:(MOVBload [i0] {s} p mem)
-    s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
-  && i1 == i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, s0)
-  => @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
-
-(ORL                  x0:(MOVBload [i] {s} p0 mem)
-    s0:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem)))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, s0)
-  => @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
-
-(ORL o0:(ORL
-                       x0:(MOVWload [i0] {s} p mem)
-    s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)))
-    s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
-  && i2 == i0+2
-  && i3 == i0+3
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && x2.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && o0.Uses == 1
-  && mergePoint(b,x0,x1,x2) != nil
-  && clobber(x0, x1, x2, s0, s1, o0)
-  => @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
-
-(ORL o0:(ORL
-                       x0:(MOVWload [i] {s} p0 mem)
-    s0:(SHLLconst [16] x1:(MOVBload [i] {s} p1 mem)))
-    s1:(SHLLconst [24] x2:(MOVBload [i] {s} p2 mem)))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && x2.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && o0.Uses == 1
-  && sequentialAddresses(p0, p1, 2)
-  && sequentialAddresses(p1, p2, 1)
-  && mergePoint(b,x0,x1,x2) != nil
-  && clobber(x0, x1, x2, s0, s1, o0)
-  => @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p0 mem)
-
-// Combine constant stores into larger (unaligned) stores.
-(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
-  && x.Uses == 1
-  && a.Off() + 1 == c.Off()
-  && clobber(x)
-  => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem)
-(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
-  && x.Uses == 1
-  && a.Off() + 1 == c.Off()
-  && clobber(x)
-  => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem)
-
-(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
-  && x.Uses == 1
-  && a.Off() == c.Off()
-  && sequentialAddresses(p0, p1, 1)
-  && clobber(x)
-  => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
-(MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
-  && x.Uses == 1
-  && a.Off() == c.Off()
-  && sequentialAddresses(p0, p1, 1)
-  && clobber(x)
-  => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
-
-(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-  && x.Uses == 1
-  && a.Off() + 2 == c.Off()
-  && clobber(x)
-  => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem)
-(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
-  && x.Uses == 1
-  && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
-  && clobber(x)
-  => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem)
-
-(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
-  && x.Uses == 1
-  && a.Off() == c.Off()
-  && sequentialAddresses(p0, p1, 2)
-  && clobber(x)
-  => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
-(MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
-  && x.Uses == 1
-  && a.Off() == c.Off()
-  && sequentialAddresses(p0, p1, 2)
-  && clobber(x)
-  => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
-
-// Combine stores into larger (unaligned) stores.
-(MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWstore [i-1] {s} p w mem)
-(MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHR(W|L)const [8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWstore [i] {s} p w mem)
-(MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWstore [i-1] {s} p w0 mem)
-
-(MOVBstore [i] {s} p1 (SHR(W|L)const [8] w) x:(MOVBstore [i] {s} p0 w mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && clobber(x)
-  => (MOVWstore [i] {s} p0 w mem)
-(MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHR(W|L)const [8] w) mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && clobber(x)
-  => (MOVWstore [i] {s} p0 w mem)
-(MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && clobber(x)
-  => (MOVWstore [i] {s} p0 w0 mem)
-
-(MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVLstore [i-2] {s} p w mem)
-(MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVLstore [i-2] {s} p w0 mem)
-
-(MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 2)
-  && clobber(x)
-  => (MOVLstore [i] {s} p0 w mem)
-(MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 2)
-  && clobber(x)
-  => (MOVLstore [i] {s} p0 w0 mem)
-
 // For PIC, break floating-point constant loading into two instructions so we have
 // a register to use for holding the address of the constant pool entry.
 (MOVSSconst [c]) && config.ctxt.Flag_shared => (MOVSSconst2 (MOVSSconst1 [c]))
index 6f9cb3698fc38e36137b8689d26c54c68ca1ee57..602c9723a6b64b45e807e4f411e6926135c92593 100644 (file)
 (BitLen8 <t> x) && buildcfg.GOAMD64 >= 3 => (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVBQZX <x.Type> x))))
 
 (Bswap(64|32) ...) => (BSWAP(Q|L) ...)
+(Bswap16 x) => (ROLWconst [8] x)
 
 (PopCount(64|32) ...) => (POPCNT(Q|L) ...)
 (PopCount16 x) => (POPCNTL (MOVWQZX <typ.UInt32> x))
 // Convert LEAQ1 back to ADDQ if we can
 (LEAQ1 [0] x y) && v.Aux == nil => (ADDQ x y)
 
-// Combining byte loads into larger (unaligned) loads.
-// There are many ways these combinations could occur.  This is
-// designed to match the way encoding/binary.LittleEndian does it.
-
-// Little-endian loads
-
-(OR(L|Q)                  x0:(MOVBload [i0] {s} p mem)
-    sh:(SHL(L|Q)const [8] x1:(MOVBload [i1] {s} p mem)))
-  && i1 == i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
-
-(OR(L|Q)                  x0:(MOVBload [i] {s} p0 mem)
-    sh:(SHL(L|Q)const [8] x1:(MOVBload [i] {s} p1 mem)))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
-
-(OR(L|Q)                   x0:(MOVWload [i0] {s} p mem)
-    sh:(SHL(L|Q)const [16] x1:(MOVWload [i1] {s} p mem)))
-  && i1 == i0+2
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
-
-(OR(L|Q)                   x0:(MOVWload [i] {s} p0 mem)
-    sh:(SHL(L|Q)const [16] x1:(MOVWload [i] {s} p1 mem)))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && sequentialAddresses(p0, p1, 2)
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem)
-
-(ORQ                   x0:(MOVLload [i0] {s} p mem)
-    sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
-  && i1 == i0+4
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
-
-(ORQ                   x0:(MOVLload [i] {s} p0 mem)
-    sh:(SHLQconst [32] x1:(MOVLload [i] {s} p1 mem)))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && sequentialAddresses(p0, p1, 4)
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem)
-
-(OR(L|Q)
-    s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem))
-    or:(OR(L|Q)
-        s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem))
-       y))
-  && i1 == i0+1
-  && j1 == j0+8
-  && j0 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
-
-(OR(L|Q)
-    s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem))
-    or:(OR(L|Q)
-        s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem))
-       y))
-  && j1 == j0+8
-  && j0 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y)
-
-(ORQ
-    s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem))
-    or:(ORQ
-        s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))
-       y))
-  && i1 == i0+2
-  && j1 == j0+16
-  && j0 % 32 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
-
-(ORQ
-    s1:(SHLQconst [j1] x1:(MOVWload [i] {s} p1 mem))
-    or:(ORQ
-        s0:(SHLQconst [j0] x0:(MOVWload [i] {s} p0 mem))
-       y))
-  && j1 == j0+16
-  && j0 % 32 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && sequentialAddresses(p0, p1, 2)
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i] {s} p0 mem)) y)
-
-// Big-endian loads
-
-(OR(L|Q)
-                           x1:(MOVBload [i1] {s} p mem)
-    sh:(SHL(L|Q)const [8]  x0:(MOVBload [i0] {s} p mem)))
-  && i1 == i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
-
-(OR(L|Q)
-                           x1:(MOVBload [i] {s} p1 mem)
-    sh:(SHL(L|Q)const [8]  x0:(MOVBload [i] {s} p0 mem)))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem))
-
-(OR(L|Q)
-                            r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
-    sh:(SHL(L|Q)const [16]  r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
-  && i1 == i0+2
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  => @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
-
-(OR(L|Q)
-                            r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem))
-    sh:(SHL(L|Q)const [16]  r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && sequentialAddresses(p0, p1, 2)
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  => @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem))
-
-(ORQ
-                        r1:(BSWAPL x1:(MOVLload [i1] {s} p mem))
-    sh:(SHLQconst [32]  r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
-  && i1 == i0+4
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  => @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
-
-(ORQ
-                        r1:(BSWAPL x1:(MOVLload [i] {s} p1 mem))
-    sh:(SHLQconst [32]  r0:(BSWAPL x0:(MOVLload [i] {s} p0 mem))))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && sequentialAddresses(p0, p1, 4)
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  => @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i] {s} p0 mem))
-
-(OR(L|Q)
-    s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem))
-    or:(OR(L|Q)
-        s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem))
-       y))
-  && i1 == i0+1
-  && j1 == j0-8
-  && j1 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-
-(OR(L|Q)
-    s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem))
-    or:(OR(L|Q)
-        s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem))
-       y))
-  && j1 == j0-8
-  && j1 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y)
-
-(ORQ
-    s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))
-    or:(ORQ
-        s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
-       y))
-  && i1 == i0+2
-  && j1 == j0-16
-  && j1 % 32 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, r0, r1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y)
-
-(ORQ
-    s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem)))
-    or:(ORQ
-        s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)))
-       y))
-  && j1 == j0-16
-  && j1 % 32 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && sequentialAddresses(p0, p1, 2)
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, r0, r1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i] {s} p0 mem))) y)
-
-// Combine 2 byte stores + shift into rolw 8 + word store
-(MOVBstore [i] {s} p w
-  x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
-  && x0.Uses == 1
-  && clobber(x0)
-  => (MOVWstore [i-1] {s} p (ROLWconst <typ.UInt16> [8] w) mem)
-(MOVBstore [i] {s} p1 w
-  x0:(MOVBstore [i] {s} p0 (SHRWconst [8] w) mem))
-  && x0.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && clobber(x0)
-  => (MOVWstore [i] {s} p0 (ROLWconst <typ.UInt16> [8] w) mem)
-
-// Combine stores + shifts into bswap and larger (unaligned) stores
-(MOVBstore [i] {s} p w
-  x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w)
-  x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w)
-  x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && x2.Uses == 1
-  && clobber(x0, x1, x2)
-  => (MOVLstore [i-3] {s} p (BSWAPL <typ.UInt32> w) mem)
-(MOVBstore [i] {s} p3 w
-  x2:(MOVBstore [i] {s} p2 (SHRLconst [8] w)
-  x1:(MOVBstore [i] {s} p1 (SHRLconst [16] w)
-  x0:(MOVBstore [i] {s} p0 (SHRLconst [24] w) mem))))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && x2.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && sequentialAddresses(p1, p2, 1)
-  && sequentialAddresses(p2, p3, 1)
-  && clobber(x0, x1, x2)
-  => (MOVLstore [i] {s} p0 (BSWAPL <typ.UInt32> w) mem)
-
-(MOVBstore [i] {s} p w
-  x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w)
-  x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w)
-  x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w)
-  x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w)
-  x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w)
-  x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w)
-  x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && clobber(x0, x1, x2, x3, x4, x5, x6)
-  => (MOVQstore [i-7] {s} p (BSWAPQ <typ.UInt64> w) mem)
-(MOVBstore [i] {s} p7 w
-  x6:(MOVBstore [i] {s} p6 (SHRQconst [8] w)
-  x5:(MOVBstore [i] {s} p5 (SHRQconst [16] w)
-  x4:(MOVBstore [i] {s} p4 (SHRQconst [24] w)
-  x3:(MOVBstore [i] {s} p3 (SHRQconst [32] w)
-  x2:(MOVBstore [i] {s} p2 (SHRQconst [40] w)
-  x1:(MOVBstore [i] {s} p1 (SHRQconst [48] w)
-  x0:(MOVBstore [i] {s} p0 (SHRQconst [56] w) mem))))))))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && x4.Uses == 1
-  && x5.Uses == 1
-  && x6.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && sequentialAddresses(p1, p2, 1)
-  && sequentialAddresses(p2, p3, 1)
-  && sequentialAddresses(p3, p4, 1)
-  && sequentialAddresses(p4, p5, 1)
-  && sequentialAddresses(p5, p6, 1)
-  && sequentialAddresses(p6, p7, 1)
-  && clobber(x0, x1, x2, x3, x4, x5, x6)
-  => (MOVQstore [i] {s} p0 (BSWAPQ <typ.UInt64> w) mem)
-
-// Combine constant stores into larger (unaligned) stores.
-(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off()))
-  && clobber(x)
-  => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
-(MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off()))
-  && clobber(x)
-  => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
-(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off()))
-  && clobber(x)
-  => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
-(MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off()))
-  && clobber(x)
-  => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
-(MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off()))
-  && clobber(x)
-  => (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem)
-(MOVLstoreconst [a] {s} p0 x:(MOVLstoreconst [c] {s} p1 mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off()))
-  && clobber(x)
-  => (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem)
 (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem))
   && config.useSSE
   && x.Uses == 1
   && clobber(x)
   => (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem)
 
-// Combine stores into larger (unaligned) stores. Little endian.
-(MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWstore [i-1] {s} p w mem)
-(MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWstore [i] {s} p w mem)
-(MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWstore [i-1] {s} p w0 mem)
-(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i] {s} p0 w mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && clobber(x)
-  => (MOVWstore [i] {s} p0 w mem)
-(MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && clobber(x)
-  => (MOVWstore [i] {s} p0 w mem)
-(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 1)
-  && clobber(x)
-  => (MOVWstore [i] {s} p0 w0 mem)
-
-(MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVLstore [i-2] {s} p w mem)
-(MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVLstore [i-2] {s} p w0 mem)
-(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i] {s} p0 w mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 2)
-  && clobber(x)
-  => (MOVLstore [i] {s} p0 w mem)
-(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 2)
-  && clobber(x)
-  => (MOVLstore [i] {s} p0 w0 mem)
-
-(MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVQstore [i-4] {s} p w mem)
-(MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVQstore [i-4] {s} p w0 mem)
-(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i] {s} p0 w mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 4)
-  && clobber(x)
-  => (MOVQstore [i] {s} p0 w mem)
-(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i] {s} p0 w0:(SHRQconst [j-32] w) mem))
-  && x.Uses == 1
-  && sequentialAddresses(p0, p1, 4)
-  && clobber(x)
-  => (MOVQstore [i] {s} p0 w0 mem)
-
-(MOVBstore  [c3] {s}  p3 (SHRQconst [56]  w)
-  x1:(MOVWstore [c2]  {s} p2  (SHRQconst [40] w)
-  x2:(MOVLstore [c1] {s} p1  (SHRQconst [8] w)
-  x3:(MOVBstore [c0] {s} p0  w mem))))
-  && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && sequentialAddresses(p0, p1, int64(1 + c0 - c1))
-  && sequentialAddresses(p0, p2, int64(5 + c0 - c2))
-  && sequentialAddresses(p0, p3, int64(7 + c0 - c3))
-  && clobber(x1, x2, x3)
-  => (MOVQstore [c0] {s} p0 w mem)
-
 (MOVBstore [i] {s} p
   x1:(MOVBload [j] {s2} p2 mem)
     mem2:(MOVBstore [i-1] {s} p
 (MOVWstore [i] {s} p x:(ROLWconst [8] w) mem)   && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => (MOVBEWstore [i] {s} p w mem)
 (MOVBEWstore [i] {s} p x:(ROLWconst [8] w) mem) && x.Uses == 1 => (MOVWstore [i] {s} p w mem)
 
-(ORQ                   x0:(MOVBELload [i0] {s} p mem)
-    sh:(SHLQconst [32] x1:(MOVBELload [i1] {s} p mem)))
-  && i0 == i1+4
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVBEQload [i1] {s} p mem)
-
-(ORQ                   x0:(MOVBELload [i] {s} p0 mem)
-    sh:(SHLQconst [32] x1:(MOVBELload [i] {s} p1 mem)))
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && sequentialAddresses(p1, p0, 4)
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)
-
 (SAR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem)
 (SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem)
 (SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem)
index 6dd45f11dce406c053f714d504a54f6c12cee08b..7b2016bff2104f369476ba3feb145b0c7cc15012 100644 (file)
 
 (Bswap64 ...) => (REV ...)
 (Bswap32 ...) => (REVW ...)
+(Bswap16 ...) => (REV16W ...)
 
 (BitRev64 ...) => (RBIT ...)
 (BitRev32 ...) => (RBITW ...)
                        (STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
                                (STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))))
 
+(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i+8] ptr mem)) && x.Uses == 1 && clobber(x) => (MOVQstorezero {s} [i] ptr mem)
+(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i-8] ptr mem)) && x.Uses == 1 && clobber(x) => (MOVQstorezero {s} [i-8] ptr mem)
+
 // strip off fractional word move
 (Move [s] dst src mem) && s%16 != 0 && s%16 <= 8 && s > 16 =>
        (Move [8]
 (ORshiftRL [rc] (ANDconst [ac] y) (SLLconst [lc] x)) && lc < rc && ac == ^((1<<uint(64-rc)-1))
        => (BFXIL [armBFAuxInt(rc-lc, 64-rc)] y x)
 
-// do combined loads
-// little endian loads
-// b[0] | b[1]<<8 => load 16-bit
-(ORshiftLL <t> [8]
-       y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
-       && i1 == i0+1
-       && x0.Uses == 1 && x1.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1
-       && mergePoint(b,x0,x1) != nil
-       && clobber(x0, x1, y0, y1)
-       => @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
-(ORshiftLL <t> [8]
-       y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))
-       y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1
-       && mergePoint(b,x0,x1) != nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x0, x1, y0, y1)
-       => @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
-(ORshiftLL <t> [8]
-       y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))
-       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
-       && x0.Uses == 1 && x1.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1
-       && mergePoint(b,x0,x1) != nil
-       && clobber(x0, x1, y0, y1)
-       => @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
-
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 => load 32-bit
-(ORshiftLL <t> [24] o0:(ORshiftLL [16]
-                   x0:(MOVHUload [i0] {s} p mem)
-       y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
-       && i2 == i0+2
-       && i3 == i0+3
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-       && y1.Uses == 1 && y2.Uses == 1
-       && o0.Uses == 1
-       && mergePoint(b,x0,x1,x2) != nil
-       && clobber(x0, x1, x2, y1, y2, o0)
-       => @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
-(ORshiftLL <t> [24] o0:(ORshiftLL [16]
-                   x0:(MOVHUloadidx ptr0 idx0 mem)
-       y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem)))
-       y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-       && y1.Uses == 1 && y2.Uses == 1
-       && o0.Uses == 1
-       && mergePoint(b,x0,x1,x2) != nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2, y1, y2, o0)
-       => @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
-(ORshiftLL <t> [24] o0:(ORshiftLL [16]
-                   x0:(MOVHUloadidx ptr idx mem)
-       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
-       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-       && y1.Uses == 1 && y2.Uses == 1
-       && o0.Uses == 1
-       && mergePoint(b,x0,x1,x2) != nil
-       && clobber(x0, x1, x2, y1, y2, o0)
-       => @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
-(ORshiftLL <t> [24] o0:(ORshiftLL [16]
-                   x0:(MOVHUloadidx2 ptr0 idx0 mem)
-       y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem)))
-       y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-       && y1.Uses == 1 && y2.Uses == 1
-       && o0.Uses == 1
-       && mergePoint(b,x0,x1,x2) != nil
-       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2, y1, y2, o0)
-       => @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
-
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 => load 64-bit
-(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
-                   x0:(MOVWUload [i0] {s} p mem)
-       y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
-       && i4 == i0+4
-       && i5 == i0+5
-       && i6 == i0+6
-       && i7 == i0+7
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
-       && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4) != nil
-       && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
-       => @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
-(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
-                   x0:(MOVWUloadidx ptr0 idx0 mem)
-       y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem)))
-       y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
-       && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4) != nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
-       => @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
-(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
-                   x0:(MOVWUloadidx4 ptr0 idx0 mem)
-       y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem)))
-       y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
-       && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4) != nil
-       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
-       => @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
-(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
-                   x0:(MOVWUloadidx ptr idx mem)
-       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
-       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
-       y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
-       y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
-       && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4) != nil
-       && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
-       => @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
-
-// b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] => load 32-bit
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
-       y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
-       && i1 == i0+1
-       && i2 == i0+2
-       && i3 == i0+3
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3) != nil
-       && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       => @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
-       y0:(MOVDnop x0:(MOVBUload [3] {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [2] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
-       y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3) != nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       => @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
-       y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
-       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
-       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
-       y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)))
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3) != nil
-       && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       => @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
-
-// b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] => load 64-bit
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
-       y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem)))
-       y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem)))
-       y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem)))
-       y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
-       && i1 == i0+1
-       && i2 == i0+2
-       && i3 == i0+3
-       && i4 == i0+4
-       && i5 == i0+5
-       && i6 == i0+6
-       && i7 == i0+7
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
-       && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
-       && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       => @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
-       y0:(MOVDnop x0:(MOVBUload [7] {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [6] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [4] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [3] {s} p mem)))
-       y5:(MOVDnop x5:(MOVBUload [2] {s} p mem)))
-       y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
-       y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
-       && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       => @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
-       y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
-       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
-       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
-       y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
-       y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
-       y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
-       y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
-       y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
-       && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
-       && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       => @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
-
-// big endian loads
-// b[1] | b[0]<<8 => load 16-bit, reverse
-(ORshiftLL <t> [8]
-       y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
-       && i1 == i0+1
-       && x0.Uses == 1 && x1.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1
-       && mergePoint(b,x0,x1) != nil
-       && clobber(x0, x1, y0, y1)
-       => @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
-(ORshiftLL <t> [8]
-       y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))
-       y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1
-       && mergePoint(b,x0,x1) != nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x0, x1, y0, y1)
-       => @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
-(ORshiftLL <t> [8]
-       y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem))
-       y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
-       && x0.Uses == 1 && x1.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1
-       && mergePoint(b,x0,x1) != nil
-       && clobber(x0, x1, y0, y1)
-       => @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
-
-// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 => load 32-bit, reverse
-(ORshiftLL <t> [24] o0:(ORshiftLL [16]
-       y0:(REV16W  x0:(MOVHUload [i2] {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
-       && i1 == i0+1
-       && i2 == i0+2
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
-       && o0.Uses == 1
-       && mergePoint(b,x0,x1,x2) != nil
-       && clobber(x0, x1, x2, y0, y1, y2, o0)
-       => @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
-(ORshiftLL <t> [24] o0:(ORshiftLL [16]
-       y0:(REV16W  x0:(MOVHUload [2] {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
-       y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
-       && o0.Uses == 1
-       && mergePoint(b,x0,x1,x2) != nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2, y0, y1, y2, o0)
-       => @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
-(ORshiftLL <t> [24] o0:(ORshiftLL [16]
-       y0:(REV16W  x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem))
-       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
-       y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
-       && o0.Uses == 1
-       && mergePoint(b,x0,x1,x2) != nil
-       && clobber(x0, x1, x2, y0, y1, y2, o0)
-       => @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
-
-// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 => load 64-bit, reverse
-(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
-       y0:(REVW    x0:(MOVWUload [i4] {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
-       && i1 == i0+1
-       && i2 == i0+2
-       && i3 == i0+3
-       && i4 == i0+4
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4) != nil
-       && clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
-       => @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
-(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
-       y0:(REVW    x0:(MOVWUload [4] {s} p mem))
-       y1:(MOVDnop x1:(MOVBUload [3] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
-       y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4) != nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
-       => @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
-(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
-       y0:(REVW    x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem))
-       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
-       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
-       y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
-       y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4) != nil
-       && clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
-       => @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
-
-// b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] => load 32-bit, reverse
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
-       y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
-       && i1 == i0+1
-       && i2 == i0+2
-       && i3 == i0+3
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3) != nil
-       && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       => @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
-       y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
-       y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
-       y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3) != nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       => @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
-       y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
-       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
-       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
-       y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3) != nil
-       && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       => @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
-
-// b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] => load 64-bit, reverse
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
-       y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
-       y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
-       y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem)))
-       y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem)))
-       y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem)))
-       y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
-       && i1 == i0+1
-       && i2 == i0+2
-       && i3 == i0+3
-       && i4 == i0+4
-       && i5 == i0+5
-       && i6 == i0+6
-       && i7 == i0+7
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
-       && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
-       && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       => @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
-       y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
-       y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
-       y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
-       y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
-       y4:(MOVDnop x4:(MOVBUload [4] {s} p mem)))
-       y5:(MOVDnop x5:(MOVBUload [5] {s} p mem)))
-       y6:(MOVDnop x6:(MOVBUload [6] {s} p mem)))
-       y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
-       && s == nil
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
-       && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       => @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
-(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
-       y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
-       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
-       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
-       y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
-       y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
-       y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
-       y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
-       y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
-       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
-       && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
-       && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
-       && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
-       && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       => @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
-
-// Combine zero stores into larger (unaligned) stores.
-(MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
-       && x.Uses == 1
-       && areAdjacentOffsets(int64(i),int64(j),1)
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVHstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
-(MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVHstorezeroidx ptr1 idx1 mem)
-(MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVHstorezeroidx ptr idx mem)
-(MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
-       && x.Uses == 1
-       && areAdjacentOffsets(int64(i),int64(j),2)
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVWstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
-(MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVWstorezeroidx ptr1 idx1 mem)
-(MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVWstorezeroidx ptr idx mem)
-(MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
-       && x.Uses == 1
-       && s == nil
-       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
-       && clobber(x)
-       => (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
-(MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
-       && x.Uses == 1
-       && areAdjacentOffsets(int64(i),int64(j),4)
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVDstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
-(MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVDstorezeroidx ptr1 idx1 mem)
-(MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVDstorezeroidx ptr idx mem)
-(MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
-       && x.Uses == 1
-       && s == nil
-       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
-       && clobber(x)
-       => (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
-(MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
-       && x.Uses == 1
-       && areAdjacentOffsets(int64(i),int64(j),8)
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVQstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
-(MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVQstorezero [0] {s} p0 mem)
-(MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
-       && x.Uses == 1
-       && s == nil
-       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
-       && clobber(x)
-       => (MOVQstorezero [0] {s} p0 mem)
-
-// Combine stores into larger (unaligned) stores.
-(MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVHstore [i-1] {s} ptr0 w mem)
-(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVHstoreidx ptr1 idx1 w mem)
-(MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVHstoreidx ptr idx w mem)
-(MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVHstore [i-1] {s} ptr0 w mem)
-(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVHstoreidx ptr1 idx1 w mem)
-(MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVHstore [i-1] {s} ptr0 w mem)
-(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVHstoreidx ptr1 idx1 w mem)
-(MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVHstore [i-1] {s} ptr0 w mem)
-(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVHstoreidx ptr1 idx1 w mem)
-(MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVHstore [i-1] {s} ptr0 w0 mem)
-(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVHstoreidx ptr1 idx1 w0 mem)
-(MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && bfc.getARM64BFwidth() == 32 - bfc.getARM64BFlsb()
-       && bfc2.getARM64BFwidth() == 32 - bfc2.getARM64BFlsb()
-       && bfc2.getARM64BFlsb() == bfc.getARM64BFlsb() - 8
-       && clobber(x)
-       => (MOVHstore [i-1] {s} ptr0 w0 mem)
-(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && bfc.getARM64BFwidth() == 32 - bfc.getARM64BFlsb()
-       && bfc2.getARM64BFwidth() == 32 - bfc2.getARM64BFlsb()
-       && bfc2.getARM64BFlsb() == bfc.getARM64BFlsb() - 8
-       && clobber(x)
-       => (MOVHstoreidx ptr1 idx1 w0 mem)
-(MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVHstore [i-1] {s} ptr0 w0 mem)
-(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVHstoreidx ptr1 idx1 w0 mem)
-(MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVWstore [i-2] {s} ptr0 w mem)
-(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVWstoreidx ptr1 idx1 w mem)
-(MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVWstoreidx ptr idx w mem)
-(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
-       && clobber(x)
-       => (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
-(MOVHstore [i] {s} ptr0 (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVWstore [i-2] {s} ptr0 w mem)
-(MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVWstoreidx ptr1 idx1 w mem)
-(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
-       && clobber(x)
-       => (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
-(MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVWstore [i-2] {s} ptr0 w mem)
-(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVWstoreidx ptr1 idx1 w mem)
-(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
-       && clobber(x)
-       => (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
-(MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVWstore [i-2] {s} ptr0 w0 mem)
-(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVWstoreidx ptr1 idx1 w0 mem)
-(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
-       && x.Uses == 1
-       && s == nil
-       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
-       && clobber(x)
-       => (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
-(MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVDstore [i-4] {s} ptr0 w mem)
-(MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVDstoreidx ptr1 idx1 w mem)
-(MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVDstoreidx ptr idx w mem)
-(MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
-       && x.Uses == 1
-       && s == nil
-       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
-       && clobber(x)
-       => (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
-(MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
-       && x.Uses == 1
-       && isSamePtr(ptr0, ptr1)
-       && clobber(x)
-       => (MOVDstore [i-4] {s} ptr0 w0 mem)
-(MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVDstoreidx ptr1 idx1 w0 mem)
-(MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
-       && x.Uses == 1
-       && s == nil
-       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
-       && clobber(x)
-       => (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
-(MOVBstore [i] {s} ptr w
-       x0:(MOVBstore [i-1] {s} ptr (SRLconst [8]  w)
-       x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
-       x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w)
-       x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w)
-       x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w)
-       x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w)
-       x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
-       && x0.Uses == 1
-       && x1.Uses == 1
-       && x2.Uses == 1
-       && x3.Uses == 1
-       && x4.Uses == 1
-       && x5.Uses == 1
-       && x6.Uses == 1
-       && clobber(x0, x1, x2, x3, x4, x5, x6)
-       => (MOVDstore [i-7] {s} ptr (REV <typ.UInt64> w) mem)
-(MOVBstore [7] {s} p w
-       x0:(MOVBstore [6] {s} p (SRLconst [8]  w)
-       x1:(MOVBstore [5] {s} p (SRLconst [16] w)
-       x2:(MOVBstore [4] {s} p (SRLconst [24] w)
-       x3:(MOVBstore [3] {s} p (SRLconst [32] w)
-       x4:(MOVBstore [2] {s} p (SRLconst [40] w)
-       x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w)
-       x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
-       && x0.Uses == 1
-       && x1.Uses == 1
-       && x2.Uses == 1
-       && x3.Uses == 1
-       && x4.Uses == 1
-       && x5.Uses == 1
-       && x6.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2, x3, x4, x5, x6)
-       => (MOVDstoreidx ptr0 idx0 (REV <typ.UInt64> w) mem)
-(MOVBstore [i] {s} ptr w
-       x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8,  24)] w)
-       x1:(MOVBstore [i-2] {s} ptr (UBFX [armBFAuxInt(16, 16)] w)
-       x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24,  8)] w) mem))))
-       && x0.Uses == 1
-       && x1.Uses == 1
-       && x2.Uses == 1
-       && clobber(x0, x1, x2)
-       => (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
-(MOVBstore [3] {s} p w
-       x0:(MOVBstore [2] {s} p (UBFX [armBFAuxInt(8, 24)] w)
-       x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [armBFAuxInt(16, 16)] w)
-       x2:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(24, 8)] w) mem))))
-       && x0.Uses == 1
-       && x1.Uses == 1
-       && x2.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2)
-       => (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
-(MOVBstoreidx ptr (ADDconst [3] idx) w
-       x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8,  24)] w)
-       x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(16, 16)] w)
-       x2:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(24, 8)] w) mem))))
-       && x0.Uses == 1
-       && x1.Uses == 1
-       && x2.Uses == 1
-       && clobber(x0, x1, x2)
-       => (MOVWstoreidx ptr idx (REVW <typ.UInt32> w) mem)
-(MOVBstoreidx ptr idx w
-       x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8,  24)] w)
-       x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(16, 16)] w)
-       x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24,  8)] w) mem))))
-       && x0.Uses == 1
-       && x1.Uses == 1
-       && x2.Uses == 1
-       && clobber(x0, x1, x2)
-       => (MOVWstoreidx ptr idx w mem)
-(MOVBstore [i] {s} ptr w
-       x0:(MOVBstore [i-1] {s} ptr (SRLconst [8]  (MOVDreg w))
-       x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w))
-       x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
-       && x0.Uses == 1
-       && x1.Uses == 1
-       && x2.Uses == 1
-       && clobber(x0, x1, x2)
-       => (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
-(MOVBstore [3] {s} p w
-       x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w))
-       x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w))
-       x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
-       && x0.Uses == 1
-       && x1.Uses == 1
-       && x2.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2)
-       => (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
-(MOVBstore [i] {s} ptr w
-       x0:(MOVBstore [i-1] {s} ptr (SRLconst [8]  w)
-       x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
-       x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
-       && x0.Uses == 1
-       && x1.Uses == 1
-       && x2.Uses == 1
-       && clobber(x0, x1, x2)
-       => (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
-(MOVBstore [3] {s} p w
-       x0:(MOVBstore [2] {s} p (SRLconst [8] w)
-       x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w)
-       x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
-       && x0.Uses == 1
-       && x1.Uses == 1
-       && x2.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && isSamePtr(p1, p)
-       && clobber(x0, x1, x2)
-       => (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
-(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
-(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
-(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 8)] w) mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
-(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 8)] w) mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
-(MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(8, 8)] w) mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVHstoreidx ptr idx (REV16W <typ.UInt16> w) mem)
-(MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 8)] w) mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVHstoreidx ptr idx w mem)
-(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
-(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
-(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) mem))
-       && x.Uses == 1
-       && clobber(x)
-       => (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
-(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 24)] w) mem))
-       && x.Uses == 1
-       && s == nil
-       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
-       && clobber(x)
-       => (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
-
 // FP simplification
 (FNEGS  (FMULS  x y)) => (FNMULS x y)
 (FNEGD  (FMULD  x y)) => (FNMULD x y)
index bdb630a28f05332ec738aea173648e3be30e763d..e51338a064a55b17127a7b1642778e473c75a53a 100644 (file)
 (MOVWstoreidx ptr idx (MOV(W|WZ)reg x) mem) => (MOVWstoreidx ptr idx x mem)
 (MOVBstoreidx ptr idx (SRWconst (MOV(H|HZ)reg x) [c]) mem) && c <= 8 => (MOVBstoreidx ptr idx (SRWconst <typ.UInt32> x [c]) mem)
 (MOVBstoreidx ptr idx (SRWconst (MOV(W|WZ)reg x) [c]) mem) && c <= 24 => (MOVBstoreidx ptr idx (SRWconst <typ.UInt32> x [c]) mem)
-(MOVHBRstore {sym} ptr (MOV(H|HZ|W|WZ)reg x) mem) => (MOVHBRstore {sym} ptr x mem)
-(MOVWBRstore {sym} ptr (MOV(W|WZ)reg x) mem) => (MOVWBRstore {sym} ptr x mem)
+(MOVHBRstore ptr (MOV(H|HZ|W|WZ)reg x) mem) => (MOVHBRstore ptr x mem)
+(MOVWBRstore ptr (MOV(W|WZ)reg x) mem) => (MOVWBRstore ptr x mem)
 
 // Lose W-widening ops fed to compare-W
 (CMP(W|WU) x (MOV(W|WZ)reg y)) => (CMP(W|WU) x y)
 (F(ADD|SUB) (FMUL x y) z) && x.Block.Func.useFMA(v) => (FM(ADD|SUB) x y z)
 (F(ADDS|SUBS) (FMULS x y) z) && x.Block.Func.useFMA(v) => (FM(ADDS|SUBS) x y z)
 
-// The following statements are found in encoding/binary functions UintXX (load) and PutUintXX (store)
-// and convert the statements in these functions from multiple single byte loads or stores to
-// the single largest possible load or store.
-// Some are marked big or little endian based on the order in which the bytes are loaded or stored,
-// not on the ordering of the machine. These are intended for little endian machines.
-// To implement for big endian machines, most rules would have to be duplicated but the
-// resulting rule would be reversed, i. e., MOVHZload on little endian would be MOVHBRload on big endian
-// and vice versa.
-// b[0] | b[1]<<8 => load 16-bit Little endian
-(OR <t> x0:(MOVBZload [i0] {s} p mem)
-       o1:(SL(W|D)const x1:(MOVBZload [i1] {s} p mem) [8]))
-       && !config.BigEndian
-       && i1 == i0+1
-       && x0.Uses ==1 && x1.Uses == 1
-       && o1.Uses == 1
-       && mergePoint(b, x0, x1) != nil
-       && clobber(x0, x1, o1)
-        => @mergePoint(b,x0,x1) (MOVHZload <t> {s} [i0] p mem)
-
-// b[0]<<8 | b[1] => load 16-bit Big endian on Little endian arch.
-// Use byte-reverse indexed load for 2 bytes.
-(OR <t> x0:(MOVBZload [i1] {s} p mem)
-       o1:(SL(W|D)const x1:(MOVBZload [i0] {s} p mem) [8]))
-       && !config.BigEndian
-       && i1 == i0+1
-       && x0.Uses ==1 && x1.Uses == 1
-       && o1.Uses == 1
-       && mergePoint(b, x0, x1) != nil
-       && clobber(x0, x1, o1)
-         => @mergePoint(b,x0,x1) (MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-
-// b[0]<<n+8 | b[1]<<n => load 16-bit Big endian (where n%8== 0)
-// Use byte-reverse indexed load for 2 bytes,
-// then shift left to the correct position. Used to match subrules
-// from longer rules.
-(OR <t> s0:(SL(W|D)const x0:(MOVBZload [i1] {s} p mem) [n1])
-       s1:(SL(W|D)const x1:(MOVBZload [i0] {s} p mem) [n2]))
-       && !config.BigEndian
-       && i1 == i0+1
-       && n1%8 == 0
-       && n2 == n1+8
-       && x0.Uses == 1 && x1.Uses == 1
-       && s0.Uses == 1 && s1.Uses == 1
-       && mergePoint(b, x0, x1) != nil
-       && clobber(x0, x1, s0, s1)
-         => @mergePoint(b,x0,x1) (SLDconst <t> (MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [n1])
-
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 => load 32-bit Little endian
-// Use byte-reverse indexed load for 4 bytes.
-(OR <t> s1:(SL(W|D)const x2:(MOVBZload [i3] {s} p mem) [24])
-       o0:(OR <t> s0:(SL(W|D)const x1:(MOVBZload [i2] {s} p mem) [16])
-       x0:(MOVHZload [i0] {s} p mem)))
-       && !config.BigEndian
-       && i2 == i0+2
-       && i3 == i0+3
-       && x0.Uses ==1 && x1.Uses == 1 && x2.Uses == 1
-       && o0.Uses == 1
-       && s0.Uses == 1 && s1.Uses == 1
-       && mergePoint(b, x0, x1, x2) != nil
-       && clobber(x0, x1, x2, s0, s1, o0)
-        => @mergePoint(b,x0,x1,x2) (MOVWZload <t> {s} [i0] p mem)
-
-// b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] => load 32-bit Big endian order on Little endian arch
-// Use byte-reverse indexed load for 4 bytes with computed address.
-// Could be used to match subrules of a longer rule.
-(OR <t> s1:(SL(W|D)const x2:(MOVBZload [i0] {s} p mem) [24])
-       o0:(OR <t> s0:(SL(W|D)const x1:(MOVBZload [i1] {s} p mem) [16])
-       x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i2] {s} p) mem)))
-       && !config.BigEndian
-       && i1 == i0+1
-       && i2 == i0+2
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-       && o0.Uses == 1
-       && s0.Uses == 1 && s1.Uses == 1
-       && mergePoint(b, x0, x1, x2) != nil
-       && clobber(x0, x1, x2, s0, s1, o0)
-         => @mergePoint(b,x0,x1,x2) (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-
-// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 => load 32-bit Big endian order on Little endian arch
-// Use byte-reverse indexed load for 4 bytes with computed address.
-// Could be used to match subrules of a longer rule.
-(OR <t> x0:(MOVBZload [i3] {s} p mem)
-       o0:(OR <t> s0:(SL(W|D)const x1:(MOVBZload [i2] {s} p mem) [8])
-       s1:(SL(W|D)const x2:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [16])))
-       && !config.BigEndian
-       && i2 == i0+2
-       && i3 == i0+3
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-       && o0.Uses == 1
-       && s0.Uses == 1 && s1.Uses == 1
-       && mergePoint(b, x0, x1, x2) != nil
-       && clobber(x0, x1, x2, s0, s1, o0)
-         => @mergePoint(b,x0,x1,x2) (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-
-// b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 => load 32-bit Big endian order on Little endian arch
-// Use byte-reverse indexed load to for 4 bytes with computed address.
-// Used to match longer rules.
-(OR <t> s2:(SLDconst x2:(MOVBZload [i3] {s} p mem) [32])
-       o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i2] {s} p mem) [40])
-       s0:(SLDconst x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [48])))
-       && !config.BigEndian
-       && i2 == i0+2
-       && i3 == i0+3
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-       && o0.Uses == 1
-       && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1
-       && mergePoint(b, x0, x1, x2) != nil
-       && clobber(x0, x1, x2, s0, s1, s2, o0)
-         => @mergePoint(b,x0,x1,x2) (SLDconst <t> (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])
-
-// b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 => load 32-bit Big endian order on Little endian arch
-// Use byte-reverse indexed load for 4 bytes with constant address.
-// Used to match longer rules.
-(OR <t> s2:(SLDconst x2:(MOVBZload [i0] {s} p mem) [56])
-        o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i1] {s} p mem) [48])
-        s0:(SLDconst x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i2] {s} p) mem) [32])))
-        && !config.BigEndian
-        && i1 == i0+1
-        && i2 == i0+2
-        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-        && o0.Uses == 1
-        && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1
-        && mergePoint(b, x0, x1, x2) != nil
-        && clobber(x0, x1, x2, s0, s1, s2, o0)
-          => @mergePoint(b,x0,x1,x2) (SLDconst <t> (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])
-
-// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4] <<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 => load 64-bit Little endian
-// Rules with commutative ops and many operands will result in extremely large functions in rewritePPC64,
-// so matching shorter previously defined subrules is important.
-// Offset must be multiple of 4 for MOVD
-(OR <t> s6:(SLDconst x7:(MOVBZload [i7] {s} p mem) [56])
-       o5:(OR <t> s5:(SLDconst x6:(MOVBZload [i6] {s} p mem) [48])
-       o4:(OR <t> s4:(SLDconst x5:(MOVBZload [i5] {s} p mem) [40])
-       o3:(OR <t> s3:(SLDconst x4:(MOVBZload [i4] {s} p mem) [32])
-       x0:(MOVWZload {s} [i0] p mem)))))
-       && !config.BigEndian
-       && i4 == i0+4
-       && i5 == i0+5
-       && i6 == i0+6
-       && i7 == i0+7
-       && x0.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses ==1 && x7.Uses == 1
-       && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1
-       && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1
-       && mergePoint(b, x0, x4, x5, x6, x7) != nil
-       && clobber(x0, x4, x5, x6, x7, s3, s4, s5, s6, o3, o4, o5)
-         => @mergePoint(b,x0,x4,x5,x6,x7) (MOVDload <t> {s} [i0] p mem)
-
-// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 load 64-bit Big endian ordered bytes on Little endian arch
-// Use byte-reverse indexed load of 8 bytes.
-// Rules with commutative ops and many operands can result in extremely large functions in rewritePPC64,
-// so matching shorter previously defined subrules is important.
-(OR <t> s0:(SLDconst x0:(MOVBZload [i0] {s} p mem) [56])
-       o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i1] {s} p mem) [48])
-       o1:(OR <t> s2:(SLDconst x2:(MOVBZload [i2] {s} p mem) [40])
-       o2:(OR <t> s3:(SLDconst x3:(MOVBZload [i3] {s} p mem) [32])
-       x4:(MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i4] p) mem)))))
-       && !config.BigEndian
-       && i1 == i0+1
-       && i2 == i0+2
-       && i3 == i0+3
-       && i4 == i0+4
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
-       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
-       && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1
-       && mergePoint(b, x0, x1, x2, x3, x4) != nil
-       && clobber(x0, x1, x2, x3, x4, o0, o1, o2, s0, s1, s2, s3)
-         => @mergePoint(b,x0,x1,x2,x3,x4) (MOVDBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-
-// b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] => load 64-bit Big endian ordered bytes on Little endian arch
-// Use byte-reverse indexed load of 8 bytes.
-// Rules with commutative ops and many operands can result in extremely large functions in rewritePPC64,
-// so matching shorter previously defined subrules is important.
-(OR <t> x7:(MOVBZload [i7] {s} p mem)
-       o5:(OR <t> s6:(SLDconst x6:(MOVBZload [i6] {s} p mem) [8])
-       o4:(OR <t> s5:(SLDconst x5:(MOVBZload [i5] {s} p mem) [16])
-       o3:(OR <t> s4:(SLDconst x4:(MOVBZload [i4] {s} p mem) [24])
-       s0:(SL(W|D)const x3:(MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])))))
-       && !config.BigEndian
-       && i4 == i0+4
-       && i5 == i0+5
-       && i6 == i0+6
-       && i7 == i0+7
-       && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
-       && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1
-       && s0.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1
-       && mergePoint(b, x3, x4, x5, x6, x7) != nil
-       && clobber(x3, x4, x5, x6, x7, o3, o4, o5, s0, s4, s5, s6)
-       => @mergePoint(b,x3,x4,x5,x6,x7) (MOVDBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-
-// 2 byte store Little endian as in:
-//      b[0] = byte(v >> 16)
-//      b[1] = byte(v >> 24)
-// Added for use in matching longer rules.
-(MOVBstore [i1] {s} p (SR(W|D)const w [24])
-        x0:(MOVBstore [i0] {s} p (SR(W|D)const w [16]) mem))
-        && !config.BigEndian
-        && x0.Uses == 1
-        && i1 == i0+1
-        && clobber(x0)
-          => (MOVHstore [i0] {s} p (SRWconst <typ.UInt16> w [16]) mem)
-
-// 2 byte store Little endian as in:
-//      b[0] = byte(v)
-//      b[1] = byte(v >> 8)
-(MOVBstore [i1] {s} p (SR(W|D)const w [8])
-       x0:(MOVBstore [i0] {s} p w mem))
-       && !config.BigEndian
-       && x0.Uses == 1
-       && i1 == i0+1
-       && clobber(x0)
-         => (MOVHstore [i0] {s} p w mem)
-
-// 4 byte store Little endian as in:
-//     b[0:1] = uint16(v)
-//     b[2:3] = uint16(v >> 16)
-(MOVHstore [i1] {s} p (SR(W|D)const w [16])
-       x0:(MOVHstore [i0] {s} p w mem))
-       && !config.BigEndian
-       && x0.Uses == 1
-       && i1 == i0+2
-       && clobber(x0)
-         => (MOVWstore [i0] {s} p w mem)
-
-// 4 byte store Big endian as in:
-//     b[0] = byte(v >> 24)
-//     b[1] = byte(v >> 16)
-//     b[2] = byte(v >> 8)
-//     b[3] = byte(v)
-// Use byte-reverse indexed 4 byte store.
-(MOVBstore [i3] {s} p w
-       x0:(MOVBstore [i2] {s} p (SRWconst w [8])
-       x1:(MOVBstore [i1] {s} p (SRWconst w [16])
-       x2:(MOVBstore [i0] {s} p (SRWconst w [24]) mem))))
-       && !config.BigEndian
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
-       && i1 == i0+1 && i2 == i0+2 && i3 == i0+3
-       && clobber(x0, x1, x2)
-         => (MOVWBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)
-
-// The 2 byte store appears after the 4 byte store so that the
-// match for the 2 byte store is not done first.
-// If the 4 byte store is based on the 2 byte store then there are
-// variations on the MOVDaddr subrule that would require additional
-// rules to be written.
-
-// 2 byte store Big endian as in:
-//      b[0] = byte(v >> 8)
-//      b[1] = byte(v)
-(MOVBstore [i1] {s} p w x0:(MOVBstore [i0] {s} p (SRWconst w [8]) mem))
-       && !config.BigEndian
-       && x0.Uses == 1
-       && i1 == i0+1
-       && clobber(x0)
-         => (MOVHBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)
-
-// 8 byte store Little endian as in:
-//     b[0] = byte(v)
-//     b[1] = byte(v >> 8)
-//     b[2] = byte(v >> 16)
-//     b[3] = byte(v >> 24)
-//     b[4] = byte(v >> 32)
-//     b[5] = byte(v >> 40)
-//     b[6] = byte(v >> 48)
-//     b[7] = byte(v >> 56)
-// Built on previously defined rules
-// Offset must be multiple of 4 for MOVDstore
-(MOVBstore [i7] {s} p (SRDconst w [56])
-       x0:(MOVBstore [i6] {s} p (SRDconst w [48])
-       x1:(MOVBstore [i5] {s} p (SRDconst w [40])
-       x2:(MOVBstore [i4] {s} p (SRDconst w [32])
-       x3:(MOVWstore [i0] {s} p w mem)))))
-       && !config.BigEndian
-       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
-       && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7
-       && clobber(x0, x1, x2, x3)
-         => (MOVDstore [i0] {s} p w mem)
-
-// 8 byte store Big endian as in:
-//      b[0] = byte(v >> 56)
-//      b[1] = byte(v >> 48)
-//      b[2] = byte(v >> 40)
-//      b[3] = byte(v >> 32)
-//      b[4] = byte(v >> 24)
-//      b[5] = byte(v >> 16)
-//      b[6] = byte(v >> 8)
-//      b[7] = byte(v)
-// Use byte-reverse indexed 8 byte store.
-(MOVBstore [i7] {s} p w
-        x0:(MOVBstore [i6] {s} p (SRDconst w [8])
-        x1:(MOVBstore [i5] {s} p (SRDconst w [16])
-        x2:(MOVBstore [i4] {s} p (SRDconst w [24])
-        x3:(MOVBstore [i3] {s} p (SRDconst w [32])
-        x4:(MOVBstore [i2] {s} p (SRDconst w [40])
-        x5:(MOVBstore [i1] {s} p (SRDconst w [48])
-        x6:(MOVBstore [i0] {s} p (SRDconst w [56]) mem))))))))
-        && !config.BigEndian
-        && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1
-        && i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7
-        && clobber(x0, x1, x2, x3, x4, x5, x6)
-          => (MOVDBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)
-
 // Arch-specific inlining for small or disjoint runtime.memmove
 (SelectN [0] call:(CALLstatic {sym} s1:(MOVDstore _ (MOVDconst [sz]) s2:(MOVDstore  _ src s3:(MOVDstore {t} _ dst mem)))))
         && sz >= 0
 
 // Use byte reverse instructions on Power10
 (Bswap(16|32|64) x) && buildcfg.GOPPC64>=10 => (BR(H|W|D) x)
+
+// Fold bit reversal into loads.
+(BR(W|H) x:(MOV(W|H)Zload [off] {sym} ptr mem)) && x.Uses == 1 => @x.Block (MOV(W|H)BRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
+(BR(W|H) x:(MOV(W|H)Zloadidx ptr idx      mem)) && x.Uses == 1 => @x.Block (MOV(W|H)Zreg (MOV(W|H)BRloadidx ptr idx mem))
+(BRD x:(MOVDload [off] {sym} ptr mem)) && x.Uses == 1 => @x.Block (MOVDBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
+(BRD x:(MOVDloadidx ptr idx      mem)) && x.Uses == 1 => @x.Block (MOVDBRloadidx ptr idx mem)
+
+// Fold bit reversal into stores.
+(MOV(D|W|H)store [off] {sym} ptr r:(BR(D|W|H) val) mem) && r.Uses == 1 => (MOV(D|W|H)BRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem)
+(MOV(D|W|H)storeidx ptr idx      r:(BR(D|W|H) val) mem) && r.Uses == 1 => (MOV(D|W|H)BRstoreidx ptr idx val mem)
+
+// GOPPC64<10 rules.
+// These Bswap operations should only be introduced by the memcombine pass in places where they can be folded into loads or stores.
+(Bswap(32|16) x:(MOV(W|H)Zload [off] {sym} ptr mem)) => @x.Block (MOV(W|H)BRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
+(Bswap(32|16) x:(MOV(W|H)Zloadidx ptr idx      mem)) => @x.Block (MOV(W|H)Zreg (MOV(W|H)BRloadidx ptr idx mem))
+(Bswap64 x:(MOVDload [off] {sym} ptr mem)) => @x.Block (MOVDBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
+(Bswap64 x:(MOVDloadidx ptr idx      mem)) => @x.Block (MOVDBRloadidx ptr idx mem)
+(MOV(D|W|H)store [off] {sym} ptr (Bswap(64|32|16) val) mem) => (MOV(D|W|H)BRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem)
+(MOV(D|W|H)storeidx ptr idx      (Bswap(64|32|16) val) mem) => (MOV(D|W|H)BRstoreidx ptr idx val mem)
index 3e644478c46d24dd7ce780a8271a0ec02b416a9e..4be362373ca48369c936360852058a03936c9ddf 100644 (file)
@@ -331,9 +331,9 @@ func init() {
                // Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend.
                // The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used.
                // In these cases the index register field is set to 0 and the full address is in the base register.
-               {name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes reverse order
-               {name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend reverse order
-               {name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend reverse order
+               {name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", typ: "UInt64", faultOnNilArg0: true}, // load 8 bytes reverse order
+               {name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", typ: "UInt32", faultOnNilArg0: true}, // load 4 bytes zero extend reverse order
+               {name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", typ: "UInt16", faultOnNilArg0: true}, // load 2 bytes zero extend reverse order
 
                // In these cases an index register is used in addition to a base register
                // Loads from memory location arg[0] + arg[1].
@@ -355,9 +355,9 @@ func init() {
 
                // Store bytes in the reverse endian order of the arch into arg0.
                // These are indexed stores with no offset field in the instruction so the auxint fields are not used.
-               {name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", aux: "Sym", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes reverse order
-               {name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", aux: "Sym", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes reverse order
-               {name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", aux: "Sym", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes reverse order
+               {name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes reverse order
+               {name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes reverse order
+               {name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes reverse order
 
                // Floating point loads from arg0+aux+auxint
                {name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float
index 8c8c6ae25103cfe05c5d00d2556e139586304672..a9d62c79ce923b6044daa794fda5534156b065c7 100644 (file)
   && clobber(g)
   => ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem)
 
-// Combine constant stores into larger (unaligned) stores.
-// Avoid SB because constant stores to relative offsets are
-// emulated by the assembler and also can't handle unaligned offsets.
-(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && a.Off() + 1 == c.Off()
-  && clobber(x)
-  => (MOVHstoreconst [makeValAndOff(c.Val()&0xff | a.Val()<<8, a.Off())] {s} p mem)
-(MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && a.Off() + 2 == c.Off()
-  && clobber(x)
-  => (MOVWstore [a.Off()] {s} p (MOVDconst [int64(c.Val()&0xffff | a.Val()<<16)]) mem)
-(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && a.Off() + 4 == c.Off()
-  && clobber(x)
-  => (MOVDstore [a.Off()] {s} p (MOVDconst [c.Val64()&0xffffffff | a.Val64()<<32]) mem)
-
-// Combine stores into larger (unaligned) stores.
-// It doesn't work on global data (based on SB) because stores with relative addressing
-// require that the memory operand be aligned.
-(MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRDconst [8] w) mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVHstore [i-1] {s} p w mem)
-(MOVBstore [i] {s} p w0:(SRDconst [j] w) x:(MOVBstore [i-1] {s} p (SRDconst [j+8] w) mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVHstore [i-1] {s} p w0 mem)
-(MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRWconst [8] w) mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVHstore [i-1] {s} p w mem)
-(MOVBstore [i] {s} p w0:(SRWconst [j] w) x:(MOVBstore [i-1] {s} p (SRWconst [j+8] w) mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVHstore [i-1] {s} p w0 mem)
-(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWstore [i-2] {s} p w mem)
-(MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWstore [i-2] {s} p w0 mem)
-(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWstore [i-2] {s} p w mem)
-(MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWstore [i-2] {s} p w0 mem)
-(MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVDstore [i-4] {s} p w mem)
-(MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVDstore [i-4] {s} p w0 mem)
-
-// Combine stores into larger (unaligned) stores with the bytes reversed (little endian).
-// Store-with-bytes-reversed instructions do not support relative memory addresses,
-// so these stores can't operate on global data (SB).
-(MOVBstore [i] {s} p (SRDconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVHBRstore [i-1] {s} p w mem)
-(MOVBstore [i] {s} p (SRDconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRDconst [j-8] w) mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVHBRstore [i-1] {s} p w0 mem)
-(MOVBstore [i] {s} p (SRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVHBRstore [i-1] {s} p w mem)
-(MOVBstore [i] {s} p (SRWconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRWconst [j-8] w) mem))
-  && p.Op != OpSB
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVHBRstore [i-1] {s} p w0 mem)
-(MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWBRstore [i-2] {s} p w mem)
-(MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWBRstore [i-2] {s} p w0 mem)
-(MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWBRstore [i-2] {s} p w mem)
-(MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVWBRstore [i-2] {s} p w0 mem)
-(MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVDBRstore [i-4] {s} p w mem)
-(MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
-  && x.Uses == 1
-  && clobber(x)
-  => (MOVDBRstore [i-4] {s} p w0 mem)
-
-(MOVBstore [7] {s} p1 (SRDconst w)
-  x1:(MOVHBRstore [5] {s} p1 (SRDconst w)
-  x2:(MOVWBRstore [1] {s} p1 (SRDconst w)
-  x3:(MOVBstore [0] {s} p1 w mem))))
-  && x1.Uses == 1
-  && x2.Uses == 1
-  && x3.Uses == 1
-  && clobber(x1, x2, x3)
-  => (MOVDBRstore {s} p1 w mem)
-
-// Combining byte loads into larger (unaligned) loads.
-
-// Big-endian loads
-
-(ORW                 x1:(MOVBZload [i1] {s} p mem)
-    sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)))
-  && i1 == i0+1
-  && p.Op != OpSB
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
-
-(OR                  x1:(MOVBZload [i1] {s} p mem)
-    sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
-  && i1 == i0+1
-  && p.Op != OpSB
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
-
-(ORW                  x1:(MOVHZload [i1] {s} p mem)
-    sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
-  && i1 == i0+2
-  && p.Op != OpSB
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
-
-(OR                   x1:(MOVHZload [i1] {s} p mem)
-    sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
-  && i1 == i0+2
-  && p.Op != OpSB
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
-
-(OR                   x1:(MOVWZload [i1] {s} p mem)
-    sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
-  && i1 == i0+4
-  && p.Op != OpSB
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
-
-(ORW
-    s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
-    or:(ORW
-        s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
-       y))
-  && i1 == i0+1
-  && j1 == j0-8
-  && j1 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
-
-(OR
-    s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
-    or:(OR
-        s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
-       y))
-  && i1 == i0+1
-  && j1 == j0-8
-  && j1 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
-
-(OR
-    s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))
-    or:(OR
-        s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))
-       y))
-  && i1 == i0+2
-  && j1 == j0-16
-  && j1 % 32 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
-
-// Little-endian loads
-
-(ORW                 x0:(MOVBZload [i0] {s} p mem)
-    sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)))
-  && p.Op != OpSB
-  && i1 == i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
-
-(OR                  x0:(MOVBZload [i0] {s} p mem)
-    sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
-  && p.Op != OpSB
-  && i1 == i0+1
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, sh)
-  => @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
-
-(ORW                  r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
-    sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
-  && i1 == i0+2
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  => @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
-
-(OR                   r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
-    sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
-  && i1 == i0+2
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  => @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
-
-(OR                   r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem))
-    sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
-  && i1 == i0+4
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && sh.Uses == 1
-  && mergePoint(b,x0,x1) != nil
-  && clobber(x0, x1, r0, r1, sh)
-  => @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
-
-(ORW
-    s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
-    or:(ORW
-        s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
-       y))
-  && p.Op != OpSB
-  && i1 == i0+1
-  && j1 == j0+8
-  && j0 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
-
-(OR
-    s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
-    or:(OR
-        s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
-       y))
-  && p.Op != OpSB
-  && i1 == i0+1
-  && j1 == j0+8
-  && j0 % 16 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
-
-(OR
-    s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))
-    or:(OR
-        s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
-       y))
-  && i1 == i0+2
-  && j1 == j0+16
-  && j0 % 32 == 0
-  && x0.Uses == 1
-  && x1.Uses == 1
-  && r0.Uses == 1
-  && r1.Uses == 1
-  && s0.Uses == 1
-  && s1.Uses == 1
-  && or.Uses == 1
-  && mergePoint(b,x0,x1,y) != nil
-  && clobber(x0, x1, r0, r1, s0, s1, or)
-  => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
-
 // Combine stores into store multiples.
 // 32-bit
 (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
 
 // Convert 32-bit store multiples into 64-bit stores.
 (STM2 [i] {s} p (SRDconst [32] x) x mem) => (MOVDstore [i] {s} p x mem)
+
+// Fold bit reversal into loads.
+(MOVWBR x:(MOVWZload    [off] {sym} ptr     mem)) && x.Uses == 1 => @x.Block (MOVWZreg (MOVWBRload    [off] {sym} ptr     mem)) // need zero extension?
+(MOVWBR x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 => @x.Block (MOVWZreg (MOVWBRloadidx [off] {sym} ptr idx mem)) // need zero extension?
+(MOVDBR x:(MOVDload     [off] {sym} ptr     mem)) && x.Uses == 1 => @x.Block (MOVDBRload    [off] {sym} ptr     mem)
+(MOVDBR x:(MOVDloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 => @x.Block (MOVDBRloadidx [off] {sym} ptr idx mem)
+
+// Fold bit reversal into stores.
+(MOV(D|W)store    [off] {sym} ptr     r:(MOV(D|W)BR x) mem) && r.Uses == 1 => (MOV(D|W)BRstore    [off] {sym} ptr     x mem)
+(MOV(D|W)storeidx [off] {sym} ptr idx r:(MOV(D|W)BR x) mem) && r.Uses == 1 => (MOV(D|W)BRstoreidx [off] {sym} ptr idx x mem)
+
+// Special bswap16 rules
+(Bswap16 x:(MOVHZload    [off] {sym} ptr     mem)) => @x.Block (MOVHZreg (MOVHBRload    [off] {sym} ptr     mem))
+(Bswap16 x:(MOVHZloadidx [off] {sym} ptr idx mem)) => @x.Block (MOVHZreg (MOVHBRloadidx [off] {sym} ptr idx mem))
+(MOVHstore    [off] {sym} ptr     (Bswap16 val) mem) => (MOVHBRstore    [off] {sym} ptr     val mem)
+(MOVHstoreidx [off] {sym} ptr idx (Bswap16 val) mem) => (MOVHBRstoreidx [off] {sym} ptr idx val mem)
index 54de1746b7d3693df078b8f42441708b80bf0251..8618cf34cdf2d645195dc5de7a030d7f933ca1dd 100644 (file)
@@ -482,6 +482,7 @@ var passes = [...]pass{
        {name: "branchelim", fn: branchelim},
        {name: "late fuse", fn: fuseLate},
        {name: "dse", fn: dse},
+       {name: "memcombine", fn: memcombine},
        {name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
        {name: "insert resched checks", fn: insertLoopReschedChecks,
                disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
@@ -580,6 +581,10 @@ var passOrder = [...]constraint{
        {"regalloc", "stackframe"},
        // trim needs regalloc to be done first.
        {"regalloc", "trim"},
+       // memcombine works better if fuse happens first, to help merge stores.
+       {"late fuse", "memcombine"},
+       // memcombine is a arch-independent pass.
+       {"memcombine", "lower"},
 }
 
 func init() {
index 6a3990500b7935cd3f69cc8f31073691384f299f..0740d9b7b01213d4d2082c11fa92c5cf62723257 100644 (file)
@@ -6,6 +6,7 @@ package ssa
 
 import (
        "cmd/compile/internal/abi"
+       "cmd/compile/internal/base"
        "cmd/compile/internal/ir"
        "cmd/compile/internal/types"
        "cmd/internal/obj"
@@ -50,6 +51,10 @@ type Config struct {
        Race           bool        // race detector enabled
        BigEndian      bool        //
        UseFMA         bool        // Use hardware FMA operation
+       unalignedOK    bool        // Unaligned loads/stores are ok
+       haveBswap64    bool        // architecture implements Bswap64
+       haveBswap32    bool        // architecture implements Bswap32
+       haveBswap16    bool        // architecture implements Bswap16
 }
 
 type (
@@ -192,6 +197,10 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
                c.FPReg = framepointerRegAMD64
                c.LinkReg = linkRegAMD64
                c.hasGReg = true
+               c.unalignedOK = true
+               c.haveBswap64 = true
+               c.haveBswap32 = true
+               c.haveBswap16 = true
        case "386":
                c.PtrSize = 4
                c.RegSize = 4
@@ -204,6 +213,9 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
                c.FPReg = framepointerReg386
                c.LinkReg = linkReg386
                c.hasGReg = false
+               c.unalignedOK = true
+               c.haveBswap32 = true
+               c.haveBswap16 = true
        case "arm":
                c.PtrSize = 4
                c.RegSize = 4
@@ -230,6 +242,10 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
                c.FPReg = framepointerRegARM64
                c.LinkReg = linkRegARM64
                c.hasGReg = true
+               c.unalignedOK = true
+               c.haveBswap64 = true
+               c.haveBswap32 = true
+               c.haveBswap16 = true
        case "ppc64":
                c.BigEndian = true
                fallthrough
@@ -249,6 +265,14 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
                c.FPReg = framepointerRegPPC64
                c.LinkReg = linkRegPPC64
                c.hasGReg = true
+               c.unalignedOK = true
+               // Note: ppc64 has register bswap ops only when GOPPC64>=10.
+               // But it has bswap+load and bswap+store ops for all ppc64 variants.
+               // That is the sense we're using them here - they are only used
+               // in contexts where they can be merged with a load or store.
+               c.haveBswap64 = true
+               c.haveBswap32 = true
+               c.haveBswap16 = true
        case "mips64":
                c.BigEndian = true
                fallthrough
@@ -288,6 +312,10 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
                c.hasGReg = true
                c.noDuffDevice = true
                c.BigEndian = true
+               c.unalignedOK = true
+               c.haveBswap64 = true
+               c.haveBswap32 = true
+               c.haveBswap16 = true // only for loads&stores, see ppc64 comment
        case "mips":
                c.BigEndian = true
                fallthrough
@@ -387,3 +415,17 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
 }
 
 func (c *Config) Ctxt() *obj.Link { return c.ctxt }
+
+func (c *Config) haveByteSwap(size int64) bool {
+       switch size {
+       case 8:
+               return c.haveBswap64
+       case 4:
+               return c.haveBswap32
+       case 2:
+               return c.haveBswap16
+       default:
+               base.Fatalf("bad size %d\n", size)
+               return false
+       }
+}
diff --git a/src/cmd/compile/internal/ssa/memcombine.go b/src/cmd/compile/internal/ssa/memcombine.go
new file mode 100644 (file)
index 0000000..fc0b665
--- /dev/null
@@ -0,0 +1,737 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+       "cmd/compile/internal/base"
+       "cmd/compile/internal/types"
+       "cmd/internal/src"
+       "sort"
+)
+
+// memcombine combines smaller loads and stores into larger ones.
+// We ensure this generates good code for encoding/binary operations.
+// It may help other cases also.
+func memcombine(f *Func) {
+       // This optimization requires that the architecture has
+       // unaligned loads and unaligned stores.
+       if !f.Config.unalignedOK {
+               return
+       }
+
+       memcombineLoads(f)
+       memcombineStores(f)
+}
+
+func memcombineLoads(f *Func) {
+       // Find "OR trees" to start with.
+       mark := f.newSparseSet(f.NumValues())
+       defer f.retSparseSet(mark)
+       var order []*Value
+
+       // Mark all values that are the argument of an OR.
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       if v.Op == OpOr16 || v.Op == OpOr32 || v.Op == OpOr64 {
+                               mark.add(v.Args[0].ID)
+                               mark.add(v.Args[1].ID)
+                       }
+               }
+       }
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       if v.Op != OpOr16 && v.Op != OpOr32 && v.Op != OpOr64 {
+                               continue
+                       }
+                       if mark.contains(v.ID) {
+                               // marked - means it is not the root of an OR tree
+                               continue
+                       }
+                       // Add the OR tree rooted at v to the order.
+                       // We use BFS here, but any walk that puts roots before leaves would work.
+                       i := len(order)
+                       order = append(order, v)
+                       for ; i < len(order); i++ {
+                               x := order[i]
+                               for j := 0; j < 2; j++ {
+                                       a := x.Args[j]
+                                       if a.Op == OpOr16 || a.Op == OpOr32 || a.Op == OpOr64 {
+                                               order = append(order, a)
+                                       }
+                               }
+                       }
+               }
+               for _, v := range order {
+                       max := f.Config.RegSize
+                       switch v.Op {
+                       case OpOr64:
+                       case OpOr32:
+                               max = 4
+                       case OpOr16:
+                               max = 2
+                       default:
+                               continue
+                       }
+                       for n := max; n > 1; n /= 2 {
+                               if combineLoads(v, n) {
+                                       break
+                               }
+                       }
+               }
+       }
+}
+
+// A BaseAddress represents the address ptr+idx, where
+// ptr is a pointer type and idx is an integer type.
+// idx may be nil, in which case it is treated as 0.
+type BaseAddress struct {
+       ptr *Value
+       idx *Value
+}
+
+// splitPtr returns the base address of ptr and any
+// constant offset from that base.
+// BaseAddress{ptr,nil},0 is always a valid result, but splitPtr
+// tries to peel away as many constants into off as possible.
+func splitPtr(ptr *Value) (BaseAddress, int64) {
+       var idx *Value
+       var off int64
+       for {
+               if ptr.Op == OpOffPtr {
+                       off += ptr.AuxInt
+                       ptr = ptr.Args[0]
+               } else if ptr.Op == OpAddPtr {
+                       if idx != nil {
+                               // We have two or more indexing values.
+                               // Pick the first one we found.
+                               return BaseAddress{ptr: ptr, idx: idx}, off
+                       }
+                       idx = ptr.Args[1]
+                       if idx.Op == OpAdd32 || idx.Op == OpAdd64 {
+                               if idx.Args[0].Op == OpConst32 || idx.Args[0].Op == OpConst64 {
+                                       off += idx.Args[0].AuxInt
+                                       idx = idx.Args[1]
+                               } else if idx.Args[1].Op == OpConst32 || idx.Args[1].Op == OpConst64 {
+                                       off += idx.Args[1].AuxInt
+                                       idx = idx.Args[0]
+                               }
+                       }
+                       ptr = ptr.Args[0]
+               } else {
+                       return BaseAddress{ptr: ptr, idx: idx}, off
+               }
+       }
+}
+
+func combineLoads(root *Value, n int64) bool {
+       orOp := root.Op
+       var shiftOp Op
+       switch orOp {
+       case OpOr64:
+               shiftOp = OpLsh64x64
+       case OpOr32:
+               shiftOp = OpLsh32x64
+       case OpOr16:
+               shiftOp = OpLsh16x64
+       default:
+               return false
+       }
+
+       // Find n values that are ORed together with the above op.
+       a := make([]*Value, 0, 8)
+       v := root
+       for int64(len(a)) < n {
+               if v.Args[0].Op == orOp {
+                       a = append(a, v.Args[1])
+                       v = v.Args[0]
+               } else if v.Args[1].Op == orOp {
+                       a = append(a, v.Args[0])
+                       v = v.Args[1]
+               } else if int64(len(a)) == n-2 {
+                       a = append(a, v.Args[0])
+                       a = append(a, v.Args[1])
+                       v = nil
+               } else {
+                       return false
+               }
+       }
+       tail := v // Value to OR in beyond the ones we're working with (or nil if none).
+
+       // Check that the first entry to see what ops we're looking for.
+       // All the entries should be of the form shift(extend(load)), maybe with no shift.
+       v = a[0]
+       if v.Op == shiftOp {
+               v = v.Args[0]
+       }
+       var extOp Op
+       if orOp == OpOr64 && (v.Op == OpZeroExt8to64 || v.Op == OpZeroExt16to64 || v.Op == OpZeroExt32to64) ||
+               orOp == OpOr32 && (v.Op == OpZeroExt8to32 || v.Op == OpZeroExt16to32) ||
+               orOp == OpOr16 && v.Op == OpZeroExt8to16 {
+               extOp = v.Op
+               v = v.Args[0]
+       } else {
+               return false
+       }
+       if v.Op != OpLoad {
+               return false
+       }
+       base, _ := splitPtr(v.Args[0])
+       mem := v.Args[1]
+       size := v.Type.Size()
+
+       if root.Block.Func.Config.arch == "S390X" {
+               // s390x can't handle unaligned accesses to global variables.
+               if base.ptr.Op == OpAddr {
+                       return false
+               }
+       }
+
+       // Check all the entries, extract useful info.
+       type LoadRecord struct {
+               load   *Value
+               offset int64 // offset of load address from base
+               shift  int64
+       }
+       r := make([]LoadRecord, n, 8)
+       for i := int64(0); i < n; i++ {
+               v := a[i]
+               if v.Uses != 1 {
+                       return false
+               }
+               shift := int64(0)
+               if v.Op == shiftOp {
+                       if v.Args[1].Op != OpConst64 {
+                               return false
+                       }
+                       shift = v.Args[1].AuxInt
+                       v = v.Args[0]
+                       if v.Uses != 1 {
+                               return false
+                       }
+               }
+               if v.Op != extOp {
+                       return false
+               }
+               load := v.Args[0]
+               if load.Op != OpLoad {
+                       return false
+               }
+               if load.Uses != 1 {
+                       return false
+               }
+               if load.Args[1] != mem {
+                       return false
+               }
+               p, off := splitPtr(load.Args[0])
+               if p != base {
+                       return false
+               }
+               r[i] = LoadRecord{load: load, offset: off, shift: shift}
+       }
+
+       // Sort in memory address order.
+       sort.Slice(r, func(i, j int) bool {
+               return r[i].offset < r[j].offset
+       })
+
+       // Check that we have contiguous offsets.
+       for i := int64(0); i < n; i++ {
+               if r[i].offset != r[0].offset+i*size {
+                       return false
+               }
+       }
+
+       // Check for reads in little-endian or big-endian order.
+       shift0 := r[0].shift
+       isLittleEndian := true
+       for i := int64(0); i < n; i++ {
+               if r[i].shift != shift0+i*size*8 {
+                       isLittleEndian = false
+                       break
+               }
+       }
+       isBigEndian := true
+       for i := int64(0); i < n; i++ {
+               if r[i].shift != shift0-i*size*8 {
+                       isBigEndian = false
+                       break
+               }
+       }
+       if !isLittleEndian && !isBigEndian {
+               return false
+       }
+
+       // Find a place to put the new load.
+       // This is tricky, because it has to be at a point where
+       // its memory argument is live. We can't just put it in root.Block.
+       // We use the block of the latest load.
+       loads := make([]*Value, n, 8)
+       for i := int64(0); i < n; i++ {
+               loads[i] = r[i].load
+       }
+       loadBlock := mergePoint(root.Block, loads...)
+       if loadBlock == nil {
+               return false
+       }
+       // Find a source position to use.
+       pos := src.NoXPos
+       for _, load := range loads {
+               if load.Block == loadBlock {
+                       pos = load.Pos
+                       break
+               }
+       }
+       if pos == src.NoXPos {
+               return false
+       }
+
+       // Check to see if we need byte swap before storing.
+       needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
+               isBigEndian && !root.Block.Func.Config.BigEndian
+       if needSwap && (size != 1 || !root.Block.Func.Config.haveByteSwap(n)) {
+               return false
+       }
+
+       // This is the commit point.
+
+       // First, issue load at lowest address.
+       v = loadBlock.NewValue2(pos, OpLoad, sizeType(n*size), r[0].load.Args[0], mem)
+
+       // Byte swap if needed,
+       if needSwap {
+               v = byteSwap(loadBlock, pos, v)
+       }
+
+       // Extend if needed.
+       if n*size < root.Type.Size() {
+               v = zeroExtend(loadBlock, pos, v, n*size, root.Type.Size())
+       }
+
+       // Shift if needed.
+       if isLittleEndian && shift0 != 0 {
+               v = leftShift(loadBlock, pos, v, shift0)
+       }
+       if isBigEndian && shift0-(n-1)*8 != 0 {
+               v = leftShift(loadBlock, pos, v, shift0-(n-1)*8)
+       }
+
+       // Install. If there's a tail, make the root (OR v tail).
+       // If not, do (Copy v).
+       if tail != nil {
+               root.SetArg(0, v)
+               root.SetArg(1, tail)
+       } else {
+               root.reset(OpCopy)
+               root.AddArg(v)
+       }
+
+       // Clobber the loads, just to prevent additional work being done on
+       // subtrees (which are now unreachable).
+       for i := int64(0); i < n; i++ {
+               clobber(r[i].load)
+       }
+       return true
+}
+
+func memcombineStores(f *Func) {
+       mark := f.newSparseSet(f.NumValues())
+       defer f.retSparseSet(mark)
+       var order []*Value
+
+       for _, b := range f.Blocks {
+               // Mark all stores which are not last in a store sequence.
+               mark.clear()
+               for _, v := range b.Values {
+                       if v.Op == OpStore {
+                               mark.add(v.MemoryArg().ID)
+                       }
+               }
+
+               // pick an order for visiting stores such that
+               // later stores come earlier in the ordering.
+               order = order[:0]
+               for _, v := range b.Values {
+                       if v.Op != OpStore {
+                               continue
+                       }
+                       if mark.contains(v.ID) {
+                               continue // not last in a chain of stores
+                       }
+                       for {
+                               order = append(order, v)
+                               v = v.Args[2]
+                               if v.Block != b || v.Op != OpStore {
+                                       break
+                               }
+                       }
+               }
+
+               // Look for combining opportunities at each store in queue order.
+               for _, v := range order {
+                       if v.Op != OpStore { // already rewritten
+                               continue
+                       }
+
+                       size := v.Aux.(*types.Type).Size()
+                       if size >= f.Config.RegSize || size == 0 {
+                               continue
+                       }
+
+                       for n := f.Config.RegSize / size; n > 1; n /= 2 {
+                               if combineStores(v, n) {
+                                       continue
+                               }
+                       }
+               }
+       }
+}
+
+// Try to combine the n stores ending in root.
+// Returns true if successful.
+func combineStores(root *Value, n int64) bool {
+       // Helper functions.
+       type StoreRecord struct {
+               store  *Value
+               offset int64
+       }
+       getShiftBase := func(a []StoreRecord) *Value {
+               x := a[0].store.Args[1]
+               y := a[1].store.Args[1]
+               switch x.Op {
+               case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
+                       x = x.Args[0]
+               default:
+                       return nil
+               }
+               switch y.Op {
+               case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
+                       y = y.Args[0]
+               default:
+                       return nil
+               }
+               var x2 *Value
+               switch x.Op {
+               case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
+                       x2 = x.Args[0]
+               default:
+               }
+               var y2 *Value
+               switch y.Op {
+               case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
+                       y2 = y.Args[0]
+               default:
+               }
+               if y2 == x {
+                       // a shift of x and x itself.
+                       return x
+               }
+               if x2 == y {
+                       // a shift of y and y itself.
+                       return y
+               }
+               if x2 == y2 {
+                       // 2 shifts both of the same argument.
+                       return x2
+               }
+               return nil
+       }
+       isShiftBase := func(v, base *Value) bool {
+               val := v.Args[1]
+               switch val.Op {
+               case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
+                       val = val.Args[0]
+               default:
+                       return false
+               }
+               if val == base {
+                       return true
+               }
+               switch val.Op {
+               case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
+                       val = val.Args[0]
+               default:
+                       return false
+               }
+               return val == base
+       }
+       shift := func(v, base *Value) int64 {
+               val := v.Args[1]
+               switch val.Op {
+               case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
+                       val = val.Args[0]
+               default:
+                       return -1
+               }
+               if val == base {
+                       return 0
+               }
+               switch val.Op {
+               case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
+                       val = val.Args[1]
+               default:
+                       return -1
+               }
+               if val.Op != OpConst64 {
+                       return -1
+               }
+               return val.AuxInt
+       }
+
+       // Element size of the individual stores.
+       size := root.Aux.(*types.Type).Size()
+       if size*n > root.Block.Func.Config.RegSize {
+               return false
+       }
+
+       // Gather n stores to look at. Check easy conditions we require.
+       a := make([]StoreRecord, 0, 8)
+       rbase, roff := splitPtr(root.Args[0])
+       if root.Block.Func.Config.arch == "S390X" {
+               // s390x can't handle unaligned accesses to global variables.
+               if rbase.ptr.Op == OpAddr {
+                       return false
+               }
+       }
+       a = append(a, StoreRecord{root, roff})
+       for i, x := int64(1), root.Args[2]; i < n; i, x = i+1, x.Args[2] {
+               if x.Op != OpStore {
+                       return false
+               }
+               if x.Block != root.Block {
+                       return false
+               }
+               if x.Uses != 1 { // Note: root can have more than one use.
+                       return false
+               }
+               if x.Aux.(*types.Type).Size() != size {
+                       return false
+               }
+               base, off := splitPtr(x.Args[0])
+               if base != rbase {
+                       return false
+               }
+               a = append(a, StoreRecord{x, off})
+       }
+       // Before we sort, grab the memory arg the result should have.
+       mem := a[n-1].store.Args[2]
+
+       // Sort stores in increasing address order.
+       sort.Slice(a, func(i, j int) bool {
+               return a[i].offset < a[j].offset
+       })
+
+       // Check that everything is written to sequential locations.
+       for i := int64(0); i < n; i++ {
+               if a[i].offset != a[0].offset+i*size {
+                       return false
+               }
+       }
+
+       // Memory location we're going to write at (the lowest one).
+       ptr := a[0].store.Args[0]
+
+       // Check for constant stores
+       isConst := true
+       for i := int64(0); i < n; i++ {
+               switch a[i].store.Args[1].Op {
+               case OpConst32, OpConst16, OpConst8:
+               default:
+                       isConst = false
+                       break
+               }
+       }
+       if isConst {
+               // Modify root to do all the stores.
+               var c int64
+               mask := int64(1)<<(8*size) - 1
+               for i := int64(0); i < n; i++ {
+                       s := 8 * size * int64(i)
+                       if root.Block.Func.Config.BigEndian {
+                               s = 8*size*(n-1) - s
+                       }
+                       c |= (a[i].store.Args[1].AuxInt & mask) << s
+               }
+               var cv *Value
+               switch size * n {
+               case 2:
+                       cv = root.Block.Func.ConstInt16(types.Types[types.TUINT16], int16(c))
+               case 4:
+                       cv = root.Block.Func.ConstInt32(types.Types[types.TUINT32], int32(c))
+               case 8:
+                       cv = root.Block.Func.ConstInt64(types.Types[types.TUINT64], c)
+               }
+
+               // Move all the stores to the root.
+               for i := int64(0); i < n; i++ {
+                       v := a[i].store
+                       if v == root {
+                               v.Aux = cv.Type // widen store type
+                               v.SetArg(0, ptr)
+                               v.SetArg(1, cv)
+                               v.SetArg(2, mem)
+                       } else {
+                               clobber(v)
+                               v.Type = types.Types[types.TBOOL] // erase memory type
+                       }
+               }
+               return true
+       }
+
+       // Check that all the shift/trunc are of the same base value.
+       shiftBase := getShiftBase(a)
+       if shiftBase == nil {
+               return false
+       }
+       for i := int64(0); i < n; i++ {
+               if !isShiftBase(a[i].store, shiftBase) {
+                       return false
+               }
+       }
+
+       // Check for writes in little-endian or big-endian order.
+       isLittleEndian := true
+       shift0 := shift(a[0].store, shiftBase)
+       for i := int64(1); i < n; i++ {
+               if shift(a[i].store, shiftBase) != shift0+i*8 {
+                       isLittleEndian = false
+                       break
+               }
+       }
+       isBigEndian := true
+       for i := int64(1); i < n; i++ {
+               if shift(a[i].store, shiftBase) != shift0-i*8 {
+                       isBigEndian = false
+                       break
+               }
+       }
+       if !isLittleEndian && !isBigEndian {
+               return false
+       }
+
+       // Check to see if we need byte swap before storing.
+       needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
+               isBigEndian && !root.Block.Func.Config.BigEndian
+       if needSwap && (size != 1 || !root.Block.Func.Config.haveByteSwap(n)) {
+               return false
+       }
+
+       // This is the commit point.
+
+       // Modify root to do all the stores.
+       sv := shiftBase
+       if isLittleEndian && shift0 != 0 {
+               sv = rightShift(root.Block, root.Pos, sv, shift0)
+       }
+       if isBigEndian && shift0-(n-1)*8 != 0 {
+               sv = rightShift(root.Block, root.Pos, sv, shift0-(n-1)*8)
+       }
+       if sv.Type.Size() > size*n {
+               sv = truncate(root.Block, root.Pos, sv, sv.Type.Size(), size*n)
+       }
+       if needSwap {
+               sv = byteSwap(root.Block, root.Pos, sv)
+       }
+
+       // Move all the stores to the root.
+       for i := int64(0); i < n; i++ {
+               v := a[i].store
+               if v == root {
+                       v.Aux = sv.Type // widen store type
+                       v.SetArg(0, ptr)
+                       v.SetArg(1, sv)
+                       v.SetArg(2, mem)
+               } else {
+                       clobber(v)
+                       v.Type = types.Types[types.TBOOL] // erase memory type
+               }
+       }
+       return true
+}
+
+func sizeType(size int64) *types.Type {
+       switch size {
+       case 8:
+               return types.Types[types.TUINT64]
+       case 4:
+               return types.Types[types.TUINT32]
+       case 2:
+               return types.Types[types.TUINT16]
+       default:
+               base.Fatalf("bad size %d\n", size)
+               return nil
+       }
+}
+
+func truncate(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
+       switch from*10 + to {
+       case 82:
+               return b.NewValue1(pos, OpTrunc64to16, types.Types[types.TUINT16], v)
+       case 84:
+               return b.NewValue1(pos, OpTrunc64to32, types.Types[types.TUINT32], v)
+       case 42:
+               return b.NewValue1(pos, OpTrunc32to16, types.Types[types.TUINT16], v)
+       default:
+               base.Fatalf("bad sizes %d %d\n", from, to)
+               return nil
+       }
+}
+func zeroExtend(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
+       switch from*10 + to {
+       case 24:
+               return b.NewValue1(pos, OpZeroExt16to32, types.Types[types.TUINT32], v)
+       case 28:
+               return b.NewValue1(pos, OpZeroExt16to64, types.Types[types.TUINT64], v)
+       case 48:
+               return b.NewValue1(pos, OpZeroExt32to64, types.Types[types.TUINT64], v)
+       default:
+               base.Fatalf("bad sizes %d %d\n", from, to)
+               return nil
+       }
+}
+
+func leftShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
+       s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
+       size := v.Type.Size()
+       switch size {
+       case 8:
+               return b.NewValue2(pos, OpLsh64x64, v.Type, v, s)
+       case 4:
+               return b.NewValue2(pos, OpLsh32x64, v.Type, v, s)
+       case 2:
+               return b.NewValue2(pos, OpLsh16x64, v.Type, v, s)
+       default:
+               base.Fatalf("bad size %d\n", size)
+               return nil
+       }
+}
+func rightShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
+       s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
+       size := v.Type.Size()
+       switch size {
+       case 8:
+               return b.NewValue2(pos, OpRsh64Ux64, v.Type, v, s)
+       case 4:
+               return b.NewValue2(pos, OpRsh32Ux64, v.Type, v, s)
+       case 2:
+               return b.NewValue2(pos, OpRsh16Ux64, v.Type, v, s)
+       default:
+               base.Fatalf("bad size %d\n", size)
+               return nil
+       }
+}
+func byteSwap(b *Block, pos src.XPos, v *Value) *Value {
+       switch v.Type.Size() {
+       case 8:
+               return b.NewValue1(pos, OpBswap64, v.Type, v)
+       case 4:
+               return b.NewValue1(pos, OpBswap32, v.Type, v)
+       case 2:
+               return b.NewValue1(pos, OpBswap16, v.Type, v)
+
+       default:
+               v.Fatalf("bad size %d\n", v.Type.Size())
+               return nil
+       }
+}
index 16052abbde95b4ccb08470e928e14ea34d1e3b6f..5e04805ba7be16cdc687743377592a871ea62c24 100644 (file)
@@ -29470,10 +29470,8 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "MOVDBRload",
-               auxType:        auxSymOff,
                argLen:         2,
                faultOnNilArg0: true,
-               symEffect:      SymRead,
                asm:            ppc64.AMOVDBR,
                reg: regInfo{
                        inputs: []inputInfo{
@@ -29486,10 +29484,8 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "MOVWBRload",
-               auxType:        auxSymOff,
                argLen:         2,
                faultOnNilArg0: true,
-               symEffect:      SymRead,
                asm:            ppc64.AMOVWBR,
                reg: regInfo{
                        inputs: []inputInfo{
@@ -29502,10 +29498,8 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "MOVHBRload",
-               auxType:        auxSymOff,
                argLen:         2,
                faultOnNilArg0: true,
-               symEffect:      SymRead,
                asm:            ppc64.AMOVHBR,
                reg: regInfo{
                        inputs: []inputInfo{
@@ -29684,10 +29678,8 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "MOVDBRstore",
-               auxType:        auxSym,
                argLen:         3,
                faultOnNilArg0: true,
-               symEffect:      SymWrite,
                asm:            ppc64.AMOVDBR,
                reg: regInfo{
                        inputs: []inputInfo{
@@ -29698,10 +29690,8 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "MOVWBRstore",
-               auxType:        auxSym,
                argLen:         3,
                faultOnNilArg0: true,
-               symEffect:      SymWrite,
                asm:            ppc64.AMOVWBR,
                reg: regInfo{
                        inputs: []inputInfo{
@@ -29712,10 +29702,8 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "MOVHBRstore",
-               auxType:        auxSym,
                argLen:         3,
                faultOnNilArg0: true,
-               symEffect:      SymWrite,
                asm:            ppc64.AMOVHBR,
                reg: regInfo{
                        inputs: []inputInfo{
index 550e7d5e4b9c5deae55b71298f7b80c13486e600..b0512676c9e8c5e7f434ba57a2707a3ad53ddf09 100644 (file)
@@ -278,6 +278,8 @@ func rewriteValue386(v *Value) bool {
        case OpAvg32u:
                v.Op = Op386AVGLU
                return true
+       case OpBswap16:
+               return rewriteValue386_OpBswap16(v)
        case OpBswap32:
                v.Op = Op386BSWAPL
                return true
@@ -3715,266 +3717,6 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool {
                v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != Op386SHRWconst || auxIntToInt16(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != Op386SHRLconst || auxIntToInt32(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRWconst [8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x := v_2
-               if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i+1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != Op386SHRWconst || auxIntToInt16(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRLconst [8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x := v_2
-               if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i+1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != Op386SHRLconst || auxIntToInt32(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != Op386SHRLconst {
-                       break
-               }
-               j := auxIntToInt32(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != Op386SHRLconst || auxIntToInt32(w0.AuxInt) != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != Op386SHRWconst || auxIntToInt16(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != Op386SHRLconst || auxIntToInt32(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHRWconst [8] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               w := v_1
-               x := v_2
-               if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p1 := x.Args[0]
-               x_1 := x.Args[1]
-               if x_1.Op != Op386SHRWconst || auxIntToInt16(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHRLconst [8] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               w := v_1
-               x := v_2
-               if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p1 := x.Args[0]
-               x_1 := x.Args[1]
-               if x_1.Op != Op386SHRLconst || auxIntToInt32(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != Op386SHRLconst {
-                       break
-               }
-               j := auxIntToInt32(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != Op386SHRLconst || auxIntToInt32(w0.AuxInt) != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w0, mem)
-               return true
-       }
        return false
 }
 func rewriteValue386_Op386MOVBstoreconst(v *Value) bool {
@@ -4025,108 +3767,6 @@ func rewriteValue386_Op386MOVBstoreconst(v *Value) bool {
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1 && a.Off() + 1 == c.Off() && clobber(x)
-       // result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem)
-       for {
-               c := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               x := v_1
-               if x.Op != Op386MOVBstoreconst {
-                       break
-               }
-               a := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[1]
-               if p != x.Args[0] || !(x.Uses == 1 && a.Off()+1 == c.Off() && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p, mem)
-               return true
-       }
-       // match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
-       // cond: x.Uses == 1 && a.Off() + 1 == c.Off() && clobber(x)
-       // result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem)
-       for {
-               a := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               x := v_1
-               if x.Op != Op386MOVBstoreconst {
-                       break
-               }
-               c := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[1]
-               if p != x.Args[0] || !(x.Uses == 1 && a.Off()+1 == c.Off() && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p, mem)
-               return true
-       }
-       // match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
-       // cond: x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
-       for {
-               c := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               x := v_1
-               if x.Op != Op386MOVBstoreconst {
-                       break
-               }
-               a := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[1]
-               p0 := x.Args[0]
-               if !(x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 1) && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p0, mem)
-               return true
-       }
-       // match: (MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
-       // cond: x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
-       for {
-               a := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               x := v_1
-               if x.Op != Op386MOVBstoreconst {
-                       break
-               }
-               c := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[1]
-               p1 := x.Args[0]
-               if !(x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 1) && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVWstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p0, mem)
-               return true
-       }
        return false
 }
 func rewriteValue386_Op386MOVLload(v *Value) bool {
@@ -5258,115 +4898,6 @@ func rewriteValue386_Op386MOVWstore(v *Value) bool {
                v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != Op386SHRLconst || auxIntToInt32(v_1.AuxInt) != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != Op386MOVWstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != Op386SHRLconst {
-                       break
-               }
-               j := auxIntToInt32(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != Op386MOVWstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != Op386SHRLconst || auxIntToInt32(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
-       // result: (MOVLstore [i] {s} p0 w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != Op386SHRLconst || auxIntToInt32(v_1.AuxInt) != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != Op386MOVWstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
-       // result: (MOVLstore [i] {s} p0 w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != Op386SHRLconst {
-                       break
-               }
-               j := auxIntToInt32(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != Op386MOVWstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != Op386SHRLconst || auxIntToInt32(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVLstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w0, mem)
-               return true
-       }
        return false
 }
 func rewriteValue386_Op386MOVWstoreconst(v *Value) bool {
@@ -5417,108 +4948,6 @@ func rewriteValue386_Op386MOVWstoreconst(v *Value) bool {
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1 && a.Off() + 2 == c.Off() && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem)
-       for {
-               c := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               x := v_1
-               if x.Op != Op386MOVWstoreconst {
-                       break
-               }
-               a := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[1]
-               if p != x.Args[0] || !(x.Uses == 1 && a.Off()+2 == c.Off() && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p, mem)
-               return true
-       }
-       // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem)
-       for {
-               a := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               x := v_1
-               if x.Op != Op386MOVWstoreconst {
-                       break
-               }
-               c := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[1]
-               if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p, mem)
-               return true
-       }
-       // match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
-       // cond: x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 2) && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
-       for {
-               c := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               x := v_1
-               if x.Op != Op386MOVWstoreconst {
-                       break
-               }
-               a := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[1]
-               p0 := x.Args[0]
-               if !(x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 2) && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p0, mem)
-               return true
-       }
-       // match: (MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
-       // cond: x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 2) && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
-       for {
-               a := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               x := v_1
-               if x.Op != Op386MOVWstoreconst {
-                       break
-               }
-               c := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[1]
-               p1 := x.Args[0]
-               if !(x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 2) && clobber(x)) {
-                       break
-               }
-               v.reset(Op386MOVLstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p0, mem)
-               return true
-       }
        return false
 }
 func rewriteValue386_Op386MULL(v *Value) bool {
@@ -6237,8 +5666,6 @@ func rewriteValue386_Op386NOTL(v *Value) bool {
 func rewriteValue386_Op386ORL(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
        // match: (ORL x (MOVLconst [c]))
        // result: (ORLconst [c] x)
        for {
@@ -6290,203 +5717,6 @@ func rewriteValue386_Op386ORL(v *Value) bool {
                v.copyOf(x)
                return true
        }
-       // match: (ORL x0:(MOVBload [i0] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != Op386MOVBload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       s0 := v_1
-                       if s0.Op != Op386SHLLconst || auxIntToInt32(s0.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := s0.Args[0]
-                       if x1.Op != Op386MOVBload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, Op386MOVWload, typ.UInt16)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORL x0:(MOVBload [i] {s} p0 mem) s0:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != Op386MOVBload {
-                               continue
-                       }
-                       i := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p0 := x0.Args[0]
-                       s0 := v_1
-                       if s0.Op != Op386SHLLconst || auxIntToInt32(s0.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := s0.Args[0]
-                       if x1.Op != Op386MOVBload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       p1 := x1.Args[0]
-                       if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, Op386MOVWload, typ.UInt16)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p0, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORL o0:(ORL x0:(MOVWload [i0] {s} p mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
-       // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != Op386ORL {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o0_0 := o0.Args[0]
-                       o0_1 := o0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-                               x0 := o0_0
-                               if x0.Op != Op386MOVWload {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x0.AuxInt)
-                               s := auxToSym(x0.Aux)
-                               mem := x0.Args[1]
-                               p := x0.Args[0]
-                               s0 := o0_1
-                               if s0.Op != Op386SHLLconst || auxIntToInt32(s0.AuxInt) != 16 {
-                                       continue
-                               }
-                               x1 := s0.Args[0]
-                               if x1.Op != Op386MOVBload {
-                                       continue
-                               }
-                               i2 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               s1 := v_1
-                               if s1.Op != Op386SHLLconst || auxIntToInt32(s1.AuxInt) != 24 {
-                                       continue
-                               }
-                               x2 := s1.Args[0]
-                               if x2.Op != Op386MOVBload {
-                                       continue
-                               }
-                               i3 := auxIntToInt32(x2.AuxInt)
-                               if auxToSym(x2.Aux) != s {
-                                       continue
-                               }
-                               _ = x2.Args[1]
-                               if p != x2.Args[0] || mem != x2.Args[1] || !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2)
-                               v0 := b.NewValue0(x2.Pos, Op386MOVLload, typ.UInt32)
-                               v.copyOf(v0)
-                               v0.AuxInt = int32ToAuxInt(i0)
-                               v0.Aux = symToAux(s)
-                               v0.AddArg2(p, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (ORL o0:(ORL x0:(MOVWload [i] {s} p0 mem) s0:(SHLLconst [16] x1:(MOVBload [i] {s} p1 mem))) s1:(SHLLconst [24] x2:(MOVBload [i] {s} p2 mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && sequentialAddresses(p0, p1, 2) && sequentialAddresses(p1, p2, 1) && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p0 mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != Op386ORL {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o0_0 := o0.Args[0]
-                       o0_1 := o0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-                               x0 := o0_0
-                               if x0.Op != Op386MOVWload {
-                                       continue
-                               }
-                               i := auxIntToInt32(x0.AuxInt)
-                               s := auxToSym(x0.Aux)
-                               mem := x0.Args[1]
-                               p0 := x0.Args[0]
-                               s0 := o0_1
-                               if s0.Op != Op386SHLLconst || auxIntToInt32(s0.AuxInt) != 16 {
-                                       continue
-                               }
-                               x1 := s0.Args[0]
-                               if x1.Op != Op386MOVBload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               p1 := x1.Args[0]
-                               if mem != x1.Args[1] {
-                                       continue
-                               }
-                               s1 := v_1
-                               if s1.Op != Op386SHLLconst || auxIntToInt32(s1.AuxInt) != 24 {
-                                       continue
-                               }
-                               x2 := s1.Args[0]
-                               if x2.Op != Op386MOVBload || auxIntToInt32(x2.AuxInt) != i || auxToSym(x2.Aux) != s {
-                                       continue
-                               }
-                               _ = x2.Args[1]
-                               p2 := x2.Args[0]
-                               if mem != x2.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && sequentialAddresses(p0, p1, 2) && sequentialAddresses(p1, p2, 1) && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2)
-                               v0 := b.NewValue0(x2.Pos, Op386MOVLload, typ.UInt32)
-                               v.copyOf(v0)
-                               v0.AuxInt = int32ToAuxInt(i)
-                               v0.Aux = symToAux(s)
-                               v0.AddArg2(p0, mem)
-                               return true
-                       }
-               }
-               break
-       }
        return false
 }
 func rewriteValue386_Op386ORLconst(v *Value) bool {
@@ -8483,6 +7713,18 @@ func rewriteValue386_OpAddr(v *Value) bool {
                return true
        }
 }
+func rewriteValue386_OpBswap16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Bswap16 x)
+       // result: (ROLWconst [8] x)
+       for {
+               x := v_0
+               v.reset(Op386ROLWconst)
+               v.AuxInt = int16ToAuxInt(8)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValue386_OpConst16(v *Value) bool {
        // match: (Const16 [c])
        // result: (MOVLconst [int32(c)])
index 2cc80408a3b73f3c97e99b3a37c03b58820ec40b..117ab46975ef89629a3db837bea3780691b032da 100644 (file)
@@ -620,6 +620,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpBitLen64(v)
        case OpBitLen8:
                return rewriteValueAMD64_OpBitLen8(v)
+       case OpBswap16:
+               return rewriteValueAMD64_OpBswap16(v)
        case OpBswap32:
                v.Op = OpAMD64BSWAPL
                return true
@@ -10482,872 +10484,1124 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
                v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
-       // cond: x0.Uses == 1 && clobber(x0)
-       // result: (MOVWstore [i-1] {s} p (ROLWconst <typ.UInt16> [8] w) mem)
+       // match: (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) mem2:(MOVBstore [i-1] {s} p x2:(MOVBload [j-1] {s2} p2 mem) mem))
+       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)
+       // result: (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem)
        for {
                i := auxIntToInt32(v.AuxInt)
                s := auxToSym(v.Aux)
                p := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpAMD64MOVBstore || auxIntToInt32(x0.AuxInt) != i-1 || auxToSym(x0.Aux) != s {
+               x1 := v_1
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               j := auxIntToInt32(x1.AuxInt)
+               s2 := auxToSym(x1.Aux)
+               mem := x1.Args[1]
+               p2 := x1.Args[0]
+               mem2 := v_2
+               if mem2.Op != OpAMD64MOVBstore || auxIntToInt32(mem2.AuxInt) != i-1 || auxToSym(mem2.Aux) != s {
+                       break
+               }
+               _ = mem2.Args[2]
+               if p != mem2.Args[0] {
                        break
                }
-               mem := x0.Args[2]
-               if p != x0.Args[0] {
+               x2 := mem2.Args[1]
+               if x2.Op != OpAMD64MOVBload || auxIntToInt32(x2.AuxInt) != j-1 || auxToSym(x2.Aux) != s2 {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRWconst || auxIntToInt8(x0_1.AuxInt) != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && clobber(x0)) {
+               _ = x2.Args[1]
+               if p2 != x2.Args[0] || mem != x2.Args[1] || mem != mem2.Args[2] || !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)) {
                        break
                }
                v.reset(OpAMD64MOVWstore)
                v.AuxInt = int32ToAuxInt(i - 1)
                v.Aux = symToAux(s)
-               v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, typ.UInt16)
-               v0.AuxInt = int8ToAuxInt(8)
-               v0.AddArg(w)
+               v0 := b.NewValue0(x2.Pos, OpAMD64MOVWload, typ.UInt16)
+               v0.AuxInt = int32ToAuxInt(j - 1)
+               v0.Aux = symToAux(s2)
+               v0.AddArg2(p2, mem)
                v.AddArg3(p, v0, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p1 w x0:(MOVBstore [i] {s} p0 (SHRWconst [8] w) mem))
-       // cond: x0.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x0)
-       // result: (MOVWstore [i] {s} p0 (ROLWconst <typ.UInt16> [8] w) mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVBstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd32(off)
+       // result: (MOVBstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
+               sc := auxIntToValAndOff(v.AuxInt)
                s := auxToSym(v.Aux)
-               p1 := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpAMD64MOVBstore || auxIntToInt32(x0.AuxInt) != i || auxToSym(x0.Aux) != s {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               mem := x0.Args[2]
-               p0 := x0.Args[0]
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRWconst || auxIntToInt8(x0_1.AuxInt) != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x0)) {
+               off := auxIntToInt32(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(ValAndOff(sc).canAdd32(off)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
+               v.reset(OpAMD64MOVBstoreconst)
+               v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
                v.Aux = symToAux(s)
-               v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, typ.UInt16)
-               v0.AuxInt = int8ToAuxInt(8)
-               v0.AddArg(w)
-               v.AddArg3(p0, v0, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)
-       // result: (MOVLstore [i-3] {s} p (BSWAPL <typ.UInt32> w) mem)
+       // match: (MOVBstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)
+       // result: (MOVBstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x2 := v_2
-               if x2.Op != OpAMD64MOVBstore || auxIntToInt32(x2.AuxInt) != i-1 || auxToSym(x2.Aux) != s {
-                       break
-               }
-               _ = x2.Args[2]
-               if p != x2.Args[0] {
+               sc := auxIntToValAndOff(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpAMD64SHRLconst || auxIntToInt8(x2_1.AuxInt) != 8 || w != x2_1.Args[0] {
+               off := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) {
                        break
                }
-               x1 := x2.Args[2]
-               if x1.Op != OpAMD64MOVBstore || auxIntToInt32(x1.AuxInt) != i-2 || auxToSym(x1.Aux) != s {
+               v.reset(OpAMD64MOVBstoreconst)
+               v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               _ = x1.Args[2]
-               if p != x1.Args[0] {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpAMD64SHRLconst || auxIntToInt8(x1_1.AuxInt) != 16 || w != x1_1.Args[0] {
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVQload {
                        break
                }
-               x0 := x1.Args[2]
-               if x0.Op != OpAMD64MOVBstore || auxIntToInt32(x0.AuxInt) != i-3 || auxToSym(x0.Aux) != s {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               mem := x0.Args[2]
-               if p != x0.Args[0] {
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVLQSX (ANDLconst [c] x))
+       // cond: uint32(c) & 0x80000000 == 0
+       // result: (ANDLconst [c & 0x7fffffff] x)
+       for {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRLconst || auxIntToInt8(x0_1.AuxInt) != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) {
+               c := auxIntToInt32(v_0.AuxInt)
+               x := v_0.Args[0]
+               if !(uint32(c)&0x80000000 == 0) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(i - 3)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, typ.UInt32)
-               v0.AddArg(w)
-               v.AddArg3(p, v0, mem)
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = int32ToAuxInt(c & 0x7fffffff)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstore [i] {s} p3 w x2:(MOVBstore [i] {s} p2 (SHRLconst [8] w) x1:(MOVBstore [i] {s} p1 (SHRLconst [16] w) x0:(MOVBstore [i] {s} p0 (SHRLconst [24] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && clobber(x0, x1, x2)
-       // result: (MOVLstore [i] {s} p0 (BSWAPL <typ.UInt32> w) mem)
+       // match: (MOVLQSX (MOVLQSX x))
+       // result: (MOVLQSX x)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p3 := v_0
-               w := v_1
-               x2 := v_2
-               if x2.Op != OpAMD64MOVBstore || auxIntToInt32(x2.AuxInt) != i || auxToSym(x2.Aux) != s {
-                       break
-               }
-               _ = x2.Args[2]
-               p2 := x2.Args[0]
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpAMD64SHRLconst || auxIntToInt8(x2_1.AuxInt) != 8 || w != x2_1.Args[0] {
+               if v_0.Op != OpAMD64MOVLQSX {
                        break
                }
-               x1 := x2.Args[2]
-               if x1.Op != OpAMD64MOVBstore || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVLQSX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQSX (MOVWQSX x))
+       // result: (MOVWQSX x)
+       for {
+               if v_0.Op != OpAMD64MOVWQSX {
                        break
                }
-               _ = x1.Args[2]
-               p1 := x1.Args[0]
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpAMD64SHRLconst || auxIntToInt8(x1_1.AuxInt) != 16 || w != x1_1.Args[0] {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVWQSX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQSX (MOVBQSX x))
+       // result: (MOVBQSX x)
+       for {
+               if v_0.Op != OpAMD64MOVBQSX {
                        break
                }
-               x0 := x1.Args[2]
-               if x0.Op != OpAMD64MOVBstore || auxIntToInt32(x0.AuxInt) != i || auxToSym(x0.Aux) != s {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQSX)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLQSXload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVLQSX x)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLstore {
                        break
                }
-               mem := x0.Args[2]
-               p0 := x0.Args[0]
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRLconst || auxIntToInt8(x0_1.AuxInt) != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && clobber(x0, x1, x2)) {
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, typ.UInt32)
-               v0.AddArg(w)
-               v.AddArg3(p0, v0, mem)
+               v.reset(OpAMD64MOVLQSX)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)
-       // result: (MOVQstore [i-7] {s} p (BSWAPQ <typ.UInt64> w) mem)
+       // match: (MOVLQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (MOVLQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x6 := v_2
-               if x6.Op != OpAMD64MOVBstore || auxIntToInt32(x6.AuxInt) != i-1 || auxToSym(x6.Aux) != s {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               _ = x6.Args[2]
-               if p != x6.Args[0] {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               x6_1 := x6.Args[1]
-               if x6_1.Op != OpAMD64SHRQconst || auxIntToInt8(x6_1.AuxInt) != 8 || w != x6_1.Args[0] {
+               v.reset(OpAMD64MOVLQSXload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(base, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MOVLQZX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               x5 := x6.Args[2]
-               if x5.Op != OpAMD64MOVBstore || auxIntToInt32(x5.AuxInt) != i-2 || auxToSym(x5.Aux) != s {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               _ = x5.Args[2]
-               if p != x5.Args[0] {
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVLload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVLQZX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVQload {
                        break
                }
-               x5_1 := x5.Args[1]
-               if x5_1.Op != OpAMD64SHRQconst || auxIntToInt8(x5_1.AuxInt) != 16 || w != x5_1.Args[0] {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               x4 := x5.Args[2]
-               if x4.Op != OpAMD64MOVBstore || auxIntToInt32(x4.AuxInt) != i-3 || auxToSym(x4.Aux) != s {
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVLload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVLQZX x)
+       // cond: zeroUpper32Bits(x,3)
+       // result: x
+       for {
+               x := v_0
+               if !(zeroUpper32Bits(x, 3)) {
                        break
                }
-               _ = x4.Args[2]
-               if p != x4.Args[0] {
+               v.copyOf(x)
+               return true
+       }
+       // match: (MOVLQZX (ANDLconst [c] x))
+       // result: (ANDLconst [c] x)
+       for {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpAMD64SHRQconst || auxIntToInt8(x4_1.AuxInt) != 24 || w != x4_1.Args[0] {
-                       break
-               }
-               x3 := x4.Args[2]
-               if x3.Op != OpAMD64MOVBstore || auxIntToInt32(x3.AuxInt) != i-4 || auxToSym(x3.Aux) != s {
-                       break
-               }
-               _ = x3.Args[2]
-               if p != x3.Args[0] {
-                       break
-               }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpAMD64SHRQconst || auxIntToInt8(x3_1.AuxInt) != 32 || w != x3_1.Args[0] {
-                       break
-               }
-               x2 := x3.Args[2]
-               if x2.Op != OpAMD64MOVBstore || auxIntToInt32(x2.AuxInt) != i-5 || auxToSym(x2.Aux) != s {
-                       break
-               }
-               _ = x2.Args[2]
-               if p != x2.Args[0] {
-                       break
-               }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpAMD64SHRQconst || auxIntToInt8(x2_1.AuxInt) != 40 || w != x2_1.Args[0] {
-                       break
-               }
-               x1 := x2.Args[2]
-               if x1.Op != OpAMD64MOVBstore || auxIntToInt32(x1.AuxInt) != i-6 || auxToSym(x1.Aux) != s {
-                       break
-               }
-               _ = x1.Args[2]
-               if p != x1.Args[0] {
-                       break
-               }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpAMD64SHRQconst || auxIntToInt8(x1_1.AuxInt) != 48 || w != x1_1.Args[0] {
-                       break
-               }
-               x0 := x1.Args[2]
-               if x0.Op != OpAMD64MOVBstore || auxIntToInt32(x0.AuxInt) != i-7 || auxToSym(x0.Aux) != s {
-                       break
-               }
-               mem := x0.Args[2]
-               if p != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRQconst || auxIntToInt8(x0_1.AuxInt) != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(i - 7)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, typ.UInt64)
-               v0.AddArg(w)
-               v.AddArg3(p, v0, mem)
+               c := auxIntToInt32(v_0.AuxInt)
+               x := v_0.Args[0]
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = int32ToAuxInt(c)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstore [i] {s} p7 w x6:(MOVBstore [i] {s} p6 (SHRQconst [8] w) x5:(MOVBstore [i] {s} p5 (SHRQconst [16] w) x4:(MOVBstore [i] {s} p4 (SHRQconst [24] w) x3:(MOVBstore [i] {s} p3 (SHRQconst [32] w) x2:(MOVBstore [i] {s} p2 (SHRQconst [40] w) x1:(MOVBstore [i] {s} p1 (SHRQconst [48] w) x0:(MOVBstore [i] {s} p0 (SHRQconst [56] w) mem))))))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && sequentialAddresses(p3, p4, 1) && sequentialAddresses(p4, p5, 1) && sequentialAddresses(p5, p6, 1) && sequentialAddresses(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)
-       // result: (MOVQstore [i] {s} p0 (BSWAPQ <typ.UInt64> w) mem)
+       // match: (MOVLQZX (MOVLQZX x))
+       // result: (MOVLQZX x)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p7 := v_0
-               w := v_1
-               x6 := v_2
-               if x6.Op != OpAMD64MOVBstore || auxIntToInt32(x6.AuxInt) != i || auxToSym(x6.Aux) != s {
-                       break
-               }
-               _ = x6.Args[2]
-               p6 := x6.Args[0]
-               x6_1 := x6.Args[1]
-               if x6_1.Op != OpAMD64SHRQconst || auxIntToInt8(x6_1.AuxInt) != 8 || w != x6_1.Args[0] {
-                       break
-               }
-               x5 := x6.Args[2]
-               if x5.Op != OpAMD64MOVBstore || auxIntToInt32(x5.AuxInt) != i || auxToSym(x5.Aux) != s {
-                       break
-               }
-               _ = x5.Args[2]
-               p5 := x5.Args[0]
-               x5_1 := x5.Args[1]
-               if x5_1.Op != OpAMD64SHRQconst || auxIntToInt8(x5_1.AuxInt) != 16 || w != x5_1.Args[0] {
+               if v_0.Op != OpAMD64MOVLQZX {
                        break
                }
-               x4 := x5.Args[2]
-               if x4.Op != OpAMD64MOVBstore || auxIntToInt32(x4.AuxInt) != i || auxToSym(x4.Aux) != s {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVLQZX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQZX (MOVWQZX x))
+       // result: (MOVWQZX x)
+       for {
+               if v_0.Op != OpAMD64MOVWQZX {
                        break
                }
-               _ = x4.Args[2]
-               p4 := x4.Args[0]
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpAMD64SHRQconst || auxIntToInt8(x4_1.AuxInt) != 24 || w != x4_1.Args[0] {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVWQZX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQZX (MOVBQZX x))
+       // result: (MOVBQZX x)
+       for {
+               if v_0.Op != OpAMD64MOVBQZX {
                        break
                }
-               x3 := x4.Args[2]
-               if x3.Op != OpAMD64MOVBstore || auxIntToInt32(x3.AuxInt) != i || auxToSym(x3.Aux) != s {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQZX)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLatomicload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (MOVLatomicload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               _ = x3.Args[2]
-               p3 := x3.Args[0]
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpAMD64SHRQconst || auxIntToInt8(x3_1.AuxInt) != 32 || w != x3_1.Args[0] {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               x2 := x3.Args[2]
-               if x2.Op != OpAMD64MOVBstore || auxIntToInt32(x2.AuxInt) != i || auxToSym(x2.Aux) != s {
+               v.reset(OpAMD64MOVLatomicload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (MOVLatomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               _ = x2.Args[2]
-               p2 := x2.Args[0]
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpAMD64SHRQconst || auxIntToInt8(x2_1.AuxInt) != 40 || w != x2_1.Args[0] {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               x1 := x2.Args[2]
-               if x1.Op != OpAMD64MOVBstore || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
+               v.reset(OpAMD64MOVLatomicload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLf2i(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MOVLf2i <t> (Arg <u> [off] {sym}))
+       // cond: t.Size() == u.Size()
+       // result: @b.Func.Entry (Arg <t> [off] {sym})
+       for {
+               t := v.Type
+               if v_0.Op != OpArg {
                        break
                }
-               _ = x1.Args[2]
-               p1 := x1.Args[0]
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpAMD64SHRQconst || auxIntToInt8(x1_1.AuxInt) != 48 || w != x1_1.Args[0] {
+               u := v_0.Type
+               off := auxIntToInt32(v_0.AuxInt)
+               sym := auxToSym(v_0.Aux)
+               if !(t.Size() == u.Size()) {
                        break
                }
-               x0 := x1.Args[2]
-               if x0.Op != OpAMD64MOVBstore || auxIntToInt32(x0.AuxInt) != i || auxToSym(x0.Aux) != s {
+               b = b.Func.Entry
+               v0 := b.NewValue0(v.Pos, OpArg, t)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLi2f(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MOVLi2f <t> (Arg <u> [off] {sym}))
+       // cond: t.Size() == u.Size()
+       // result: @b.Func.Entry (Arg <t> [off] {sym})
+       for {
+               t := v.Type
+               if v_0.Op != OpArg {
                        break
                }
-               mem := x0.Args[2]
-               p0 := x0.Args[0]
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRQconst || auxIntToInt8(x0_1.AuxInt) != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && sequentialAddresses(p3, p4, 1) && sequentialAddresses(p4, p5, 1) && sequentialAddresses(p5, p6, 1) && sequentialAddresses(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)) {
+               u := v_0.Type
+               off := auxIntToInt32(v_0.AuxInt)
+               sym := auxToSym(v_0.Aux)
+               if !(t.Size() == u.Size()) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, typ.UInt64)
-               v0.AddArg(w)
-               v.AddArg3(p0, v0, mem)
+               b = b.Func.Entry
+               v0 := b.NewValue0(v.Pos, OpArg, t)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVLQZX x)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpAMD64SHRWconst || auxIntToInt8(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLstore {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               v.reset(OpAMD64MOVLQZX)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w mem)
+       // match: (MOVLload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (MOVLload [off1+off2] {sym} ptr mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpAMD64SHRLconst || auxIntToInt8(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               v.reset(OpAMD64MOVLload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w mem)
+       // match: (MOVLload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               v.reset(OpAMD64MOVLload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(base, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRWconst [8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i] {s} p w mem)
+       // match: (MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _))
+       // result: (MOVLf2i val)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i+1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVSSstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpAMD64SHRWconst || auxIntToInt8(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               v.reset(OpAMD64MOVLf2i)
+               v.AddArg(val)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRLconst [8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i] {s} p w mem)
+       // match: (MOVLload [off] {sym} (SB) _)
+       // cond: symIsRO(sym)
+       // result: (MOVQconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))])
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i+1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpSB || !(symIsRO(sym)) {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpAMD64SHRLconst || auxIntToInt8(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = int64ToAuxInt(int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder)))
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem)
+       // result: (MOVLstore [off] {sym} ptr x mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLQSX {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRQconst [8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i] {s} p w mem)
+       // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem)
+       // result: (MOVLstore [off] {sym} ptr x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i+1 || auxToSym(x.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLQZX {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] {
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (MOVLstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpAMD64SHRQconst || auxIntToInt8(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w0 mem)
+       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // result: (MOVLstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpAMD64SHRLconst {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               j := auxIntToInt8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               c := auxIntToInt32(v_1.AuxInt)
+               mem := v_2
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVLstore [off] {sym} ptr (MOVQconst [c]) mem)
+       // result: (MOVLstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] {
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRLconst || auxIntToInt8(w0.AuxInt) != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w0 mem)
+       // match: (MOVLstore [i] {s} p x1:(MOVLload [j] {s2} p2 mem) mem2:(MOVLstore [i-4] {s} p x2:(MOVLload [j-4] {s2} p2 mem) mem))
+       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)
+       // result: (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
        for {
                i := auxIntToInt32(v.AuxInt)
                s := auxToSym(v.Aux)
                p := v_0
-               if v_1.Op != OpAMD64SHRQconst {
+               x1 := v_1
+               if x1.Op != OpAMD64MOVLload {
+                       break
+               }
+               j := auxIntToInt32(x1.AuxInt)
+               s2 := auxToSym(x1.Aux)
+               mem := x1.Args[1]
+               p2 := x1.Args[0]
+               mem2 := v_2
+               if mem2.Op != OpAMD64MOVLstore || auxIntToInt32(mem2.AuxInt) != i-4 || auxToSym(mem2.Aux) != s {
                        break
                }
-               j := auxIntToInt8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               _ = mem2.Args[2]
+               if p != mem2.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] {
+               x2 := mem2.Args[1]
+               if x2.Op != OpAMD64MOVLload || auxIntToInt32(x2.AuxInt) != j-4 || auxToSym(x2.Aux) != s2 {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst || auxIntToInt8(w0.AuxInt) != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               _ = x2.Args[1]
+               if p2 != x2.Args[0] || mem != x2.Args[1] || mem != mem2.Args[2] || !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = int32ToAuxInt(i - 4)
                v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
+               v0 := b.NewValue0(x2.Pos, OpAMD64MOVQload, typ.UInt64)
+               v0.AuxInt = int32ToAuxInt(j - 4)
+               v0.Aux = symToAux(s2)
+               v0.AddArg2(p2, mem)
+               v.AddArg3(p, v0, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w mem)
+       // match: (MOVLstore {sym} [off] ptr y:(ADDLload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ADDLmodify [off] {sym} ptr x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpAMD64SHRWconst || auxIntToInt8(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64ADDLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
                        break
                }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+               mem := y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
+               v.reset(OpAMD64ADDLmodify)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w mem)
+       // match: (MOVLstore {sym} [off] ptr y:(ANDLload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ANDLmodify [off] {sym} ptr x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpAMD64SHRLconst || auxIntToInt8(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64ANDLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
                        break
                }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+               mem := y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
+               v.reset(OpAMD64ANDLmodify)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p1 (SHRQconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w mem)
+       // match: (MOVLstore {sym} [off] ptr y:(ORLload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ORLmodify [off] {sym} ptr x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64ORLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
                        break
                }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+               mem := y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
+               v.reset(OpAMD64ORLmodify)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRWconst [8] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w mem)
+       // match: (MOVLstore {sym} [off] ptr y:(XORLload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (XORLmodify [off] {sym} ptr x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64XORLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
                        break
                }
-               mem := x.Args[2]
-               p1 := x.Args[0]
-               x_1 := x.Args[1]
-               if x_1.Op != OpAMD64SHRWconst || auxIntToInt8(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+               mem := y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
+               v.reset(OpAMD64XORLmodify)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRLconst [8] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p1 := x.Args[0]
-               x_1 := x.Args[1]
-               if x_1.Op != OpAMD64SHRLconst || auxIntToInt8(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRQconst [8] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w mem)
+       // match: (MOVLstore {sym} [off] ptr y:(ADDL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (ADDLmodify [off] {sym} ptr x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64ADDL {
                        break
                }
-               mem := x.Args[2]
-               p1 := x.Args[0]
-               x_1 := x.Args[1]
-               if x_1.Op != OpAMD64SHRQconst || auxIntToInt8(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
-                       break
+               _ = y.Args[1]
+               y_0 := y.Args[0]
+               y_1 := y.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
+                       l := y_0
+                       if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+                               continue
+                       }
+                       mem := l.Args[1]
+                       if ptr != l.Args[0] {
+                               continue
+                       }
+                       x := y_1
+                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
+                               continue
+                       }
+                       v.reset(OpAMD64ADDLmodify)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v.AddArg3(ptr, x, mem)
+                       return true
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
-               return true
+               break
        }
-       // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w0 mem)
+       // match: (MOVLstore {sym} [off] ptr y:(SUBL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (SUBLmodify [off] {sym} ptr x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpAMD64SHRLconst {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64SUBL {
                        break
                }
-               j := auxIntToInt8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
+               x := y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                        break
                }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRLconst || auxIntToInt8(w0.AuxInt) != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+               mem := l.Args[1]
+               if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w0, mem)
+               v.reset(OpAMD64SUBLmodify)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p1 (SHRQconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRQconst [j-8] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
-       // result: (MOVWstore [i] {s} p0 w0 mem)
+       // match: (MOVLstore {sym} [off] ptr y:(ANDL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (ANDLmodify [off] {sym} ptr x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := auxIntToInt8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64ANDL {
                        break
                }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst || auxIntToInt8(w0.AuxInt) != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
-                       break
+               _ = y.Args[1]
+               y_0 := y.Args[0]
+               y_1 := y.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
+                       l := y_0
+                       if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+                               continue
+                       }
+                       mem := l.Args[1]
+                       if ptr != l.Args[0] {
+                               continue
+                       }
+                       x := y_1
+                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
+                               continue
+                       }
+                       v.reset(OpAMD64ANDLmodify)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v.AddArg3(ptr, x, mem)
+                       return true
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w0, mem)
-               return true
+               break
        }
-       // match: (MOVBstore [c3] {s} p3 (SHRQconst [56] w) x1:(MOVWstore [c2] {s} p2 (SHRQconst [40] w) x2:(MOVLstore [c1] {s} p1 (SHRQconst [8] w) x3:(MOVBstore [c0] {s} p0 w mem))))
-       // cond: x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && sequentialAddresses(p0, p1, int64(1 + c0 - c1)) && sequentialAddresses(p0, p2, int64(5 + c0 - c2)) && sequentialAddresses(p0, p3, int64(7 + c0 - c3)) && clobber(x1, x2, x3)
-       // result: (MOVQstore [c0] {s} p0 w mem)
+       // match: (MOVLstore {sym} [off] ptr y:(ORL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (ORLmodify [off] {sym} ptr x mem)
        for {
-               c3 := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p3 := v_0
-               if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 56 {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64ORL {
                        break
                }
-               w := v_1.Args[0]
-               x1 := v_2
-               if x1.Op != OpAMD64MOVWstore {
-                       break
+               _ = y.Args[1]
+               y_0 := y.Args[0]
+               y_1 := y.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
+                       l := y_0
+                       if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+                               continue
+                       }
+                       mem := l.Args[1]
+                       if ptr != l.Args[0] {
+                               continue
+                       }
+                       x := y_1
+                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
+                               continue
+                       }
+                       v.reset(OpAMD64ORLmodify)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v.AddArg3(ptr, x, mem)
+                       return true
                }
-               c2 := auxIntToInt32(x1.AuxInt)
-               if auxToSym(x1.Aux) != s {
+               break
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(XORL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
+       // result: (XORLmodify [off] {sym} ptr x mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               y := v_1
+               if y.Op != OpAMD64XORL {
                        break
                }
-               _ = x1.Args[2]
-               p2 := x1.Args[0]
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpAMD64SHRQconst || auxIntToInt8(x1_1.AuxInt) != 40 || w != x1_1.Args[0] {
-                       break
+               _ = y.Args[1]
+               y_0 := y.Args[0]
+               y_1 := y.Args[1]
+               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
+                       l := y_0
+                       if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+                               continue
+                       }
+                       mem := l.Args[1]
+                       if ptr != l.Args[0] {
+                               continue
+                       }
+                       x := y_1
+                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
+                               continue
+                       }
+                       v.reset(OpAMD64XORLmodify)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v.AddArg3(ptr, x, mem)
+                       return true
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpAMD64MOVLstore {
+               break
+       }
+       // match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
+       // result: (ADDLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               a := v_1
+               if a.Op != OpAMD64ADDLconst {
                        break
                }
-               c1 := auxIntToInt32(x2.AuxInt)
-               if auxToSym(x2.Aux) != s {
+               c := auxIntToInt32(a.AuxInt)
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                        break
                }
-               _ = x2.Args[2]
-               p1 := x2.Args[0]
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpAMD64SHRQconst || auxIntToInt8(x2_1.AuxInt) != 8 || w != x2_1.Args[0] {
+               mem := l.Args[1]
+               ptr2 := l.Args[0]
+               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
                        break
                }
-               x3 := x2.Args[2]
-               if x3.Op != OpAMD64MOVBstore {
+               v.reset(OpAMD64ADDLconstmodify)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVLstore [off] {sym} ptr a:(ANDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
+       // result: (ANDLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               a := v_1
+               if a.Op != OpAMD64ANDLconst {
                        break
                }
-               c0 := auxIntToInt32(x3.AuxInt)
-               if auxToSym(x3.Aux) != s {
+               c := auxIntToInt32(a.AuxInt)
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                        break
                }
-               mem := x3.Args[2]
-               p0 := x3.Args[0]
-               if w != x3.Args[1] || !(x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && sequentialAddresses(p0, p1, int64(1+c0-c1)) && sequentialAddresses(p0, p2, int64(5+c0-c2)) && sequentialAddresses(p0, p3, int64(7+c0-c3)) && clobber(x1, x2, x3)) {
+               mem := l.Args[1]
+               ptr2 := l.Args[0]
+               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(c0)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
+               v.reset(OpAMD64ANDLconstmodify)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) mem2:(MOVBstore [i-1] {s} p x2:(MOVBload [j-1] {s2} p2 mem) mem))
-       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)
-       // result: (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem)
+       // match: (MOVLstore [off] {sym} ptr a:(ORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
+       // result: (ORLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               x1 := v_1
-               if x1.Op != OpAMD64MOVBload {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               a := v_1
+               if a.Op != OpAMD64ORLconst {
                        break
                }
-               j := auxIntToInt32(x1.AuxInt)
-               s2 := auxToSym(x1.Aux)
-               mem := x1.Args[1]
-               p2 := x1.Args[0]
-               mem2 := v_2
-               if mem2.Op != OpAMD64MOVBstore || auxIntToInt32(mem2.AuxInt) != i-1 || auxToSym(mem2.Aux) != s {
+               c := auxIntToInt32(a.AuxInt)
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                        break
                }
-               _ = mem2.Args[2]
-               if p != mem2.Args[0] {
+               mem := l.Args[1]
+               ptr2 := l.Args[0]
+               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
                        break
                }
-               x2 := mem2.Args[1]
-               if x2.Op != OpAMD64MOVBload || auxIntToInt32(x2.AuxInt) != j-1 || auxToSym(x2.Aux) != s2 {
+               v.reset(OpAMD64ORLconstmodify)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
+       // result: (XORLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               a := v_1
+               if a.Op != OpAMD64XORLconst {
                        break
                }
-               _ = x2.Args[1]
-               if p2 != x2.Args[0] || mem != x2.Args[1] || mem != mem2.Args[2] || !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)) {
+               c := auxIntToInt32(a.AuxInt)
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
+               mem := l.Args[1]
+               ptr2 := l.Args[0]
+               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
+                       break
+               }
+               v.reset(OpAMD64XORLconstmodify)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem)
+       // result: (MOVSSstore [off] {sym} ptr val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLf2i {
+                       break
+               }
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVSSstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (MOVLstore [i] {s} p x:(BSWAPL w) mem)
+       // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3
+       // result: (MOVBELstore [i] {s} p w mem)
+       for {
+               i := auxIntToInt32(v.AuxInt)
+               s := auxToSym(v.Aux)
+               p := v_0
+               x := v_1
+               if x.Op != OpAMD64BSWAPL {
+                       break
+               }
+               w := x.Args[0]
+               mem := v_2
+               if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64MOVBELstore)
+               v.AuxInt = int32ToAuxInt(i)
                v.Aux = symToAux(s)
-               v0 := b.NewValue0(x2.Pos, OpAMD64MOVWload, typ.UInt16)
-               v0.AuxInt = int32ToAuxInt(j - 1)
-               v0.Aux = symToAux(s2)
-               v0.AddArg2(p2, mem)
-               v.AddArg3(p, v0, mem)
+               v.AddArg3(p, w, mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVBstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       // match: (MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
        // cond: ValAndOff(sc).canAdd32(off)
-       // result: (MOVBstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
+       // result: (MOVLstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
        for {
                sc := auxIntToValAndOff(v.AuxInt)
                s := auxToSym(v.Aux)
@@ -11360,15 +11614,15 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool {
                if !(ValAndOff(sc).canAdd32(off)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreconst)
+               v.reset(OpAMD64MOVLstoreconst)
                v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
                v.Aux = symToAux(s)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // match: (MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
        // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)
-       // result: (MOVBstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // result: (MOVLstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
                sc := auxIntToValAndOff(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -11382,329 +11636,204 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreconst)
+               v.reset(OpAMD64MOVLstoreconst)
                v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) && clobber(x)
-       // result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (MOVOload [off1+off2] {sym} ptr mem)
        for {
-               c := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               x := v_1
-               if x.Op != OpAMD64MOVBstoreconst {
-                       break
-               }
-               a := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               mem := x.Args[1]
-               p0 := x.Args[0]
-               if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) && clobber(x)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p0, mem)
+               v.reset(OpAMD64MOVOload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) && clobber(x)
-       // result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
+       // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               a := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               x := v_1
-               if x.Op != OpAMD64MOVBstoreconst {
-                       break
-               }
-               c := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               mem := x.Args[1]
-               p1 := x.Args[0]
-               if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) && clobber(x)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p0, mem)
+               v.reset(OpAMD64MOVOload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(base, mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+       config := b.Func.Config
+       typ := &b.Func.Config.Types
+       // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (MOVOstore [off1+off2] {sym} ptr val mem)
        for {
-               x := v_0
-               if x.Op != OpAMD64MOVLload {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off := auxIntToInt32(x.AuxInt)
-               sym := auxToSym(x.Aux)
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = int32ToAuxInt(off)
-               v0.Aux = symToAux(sym)
-               v0.AddArg2(ptr, mem)
+               v.reset(OpAMD64MOVOstore)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
-               x := v_0
-               if x.Op != OpAMD64MOVQload {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               off := auxIntToInt32(x.AuxInt)
-               sym := auxToSym(x.Aux)
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = int32ToAuxInt(off)
-               v0.Aux = symToAux(sym)
-               v0.AddArg2(ptr, mem)
+               v.reset(OpAMD64MOVOstore)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVLQSX (ANDLconst [c] x))
-       // cond: uint32(c) & 0x80000000 == 0
-       // result: (ANDLconst [c & 0x7fffffff] x)
+       // match: (MOVOstore [dstOff] {dstSym} ptr (MOVOload [srcOff] {srcSym} (SB) _) mem)
+       // cond: symIsRO(srcSym)
+       // result: (MOVQstore [dstOff+8] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff)+8, config.ctxt.Arch.ByteOrder))]) (MOVQstore [dstOff] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff), config.ctxt.Arch.ByteOrder))]) mem))
        for {
-               if v_0.Op != OpAMD64ANDLconst {
+               dstOff := auxIntToInt32(v.AuxInt)
+               dstSym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVOload {
                        break
                }
-               c := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(uint32(c)&0x80000000 == 0) {
+               srcOff := auxIntToInt32(v_1.AuxInt)
+               srcSym := auxToSym(v_1.Aux)
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpSB {
                        break
                }
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = int32ToAuxInt(c & 0x7fffffff)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQSX (MOVLQSX x))
-       // result: (MOVLQSX x)
-       for {
-               if v_0.Op != OpAMD64MOVLQSX {
+               mem := v_2
+               if !(symIsRO(srcSym)) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVLQSX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQSX (MOVWQSX x))
-       // result: (MOVWQSX x)
-       for {
-               if v_0.Op != OpAMD64MOVWQSX {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVWQSX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQSX (MOVBQSX x))
-       // result: (MOVBQSX x)
-       for {
-               if v_0.Op != OpAMD64MOVBQSX {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQSX)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = int32ToAuxInt(dstOff + 8)
+               v.Aux = symToAux(dstSym)
+               v0 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(int64(read64(srcSym, int64(srcOff)+8, config.ctxt.Arch.ByteOrder)))
+               v1 := b.NewValue0(v_1.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v1.AuxInt = int32ToAuxInt(dstOff)
+               v1.Aux = symToAux(dstSym)
+               v2 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v2.AuxInt = int64ToAuxInt(int64(read64(srcSym, int64(srcOff), config.ctxt.Arch.ByteOrder)))
+               v1.AddArg3(ptr, v2, mem)
+               v.AddArg3(ptr, v0, v1)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLQSXload(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVOstoreconst(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVLQSX x)
+       // match: (MOVOstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd32(off)
+       // result: (MOVOstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLstore {
+               sc := auxIntToValAndOff(v.AuxInt)
+               s := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               x := v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               off := auxIntToInt32(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(ValAndOff(sc).canAdd32(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLQSX)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVOstoreconst)
+               v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
+               v.Aux = symToAux(s)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVLQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MOVOstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)
+       // result: (MOVOstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
+               sc := auxIntToValAndOff(v.AuxInt)
                sym1 := auxToSym(v.Aux)
                if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
+               off := auxIntToInt32(v_0.AuxInt)
                sym2 := auxToSym(v_0.Aux)
-               base := v_0.Args[0]
+               ptr := v_0.Args[0]
                mem := v_1
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLQSXload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.reset(OpAMD64MOVOstoreconst)
+               v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
                v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(base, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVLQZX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := auxIntToInt32(x.AuxInt)
-               sym := auxToSym(x.Aux)
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVLload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = int32ToAuxInt(off)
-               v0.Aux = symToAux(sym)
-               v0.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVLQZX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v_0
-               if x.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := auxIntToInt32(x.AuxInt)
-               sym := auxToSym(x.Aux)
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVLload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = int32ToAuxInt(off)
-               v0.Aux = symToAux(sym)
-               v0.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVLQZX x)
-       // cond: zeroUpper32Bits(x,3)
-       // result: x
-       for {
-               x := v_0
-               if !(zeroUpper32Bits(x, 3)) {
-                       break
-               }
-               v.copyOf(x)
-               return true
-       }
-       // match: (MOVLQZX (ANDLconst [c] x))
-       // result: (ANDLconst [c] x)
-       for {
-               if v_0.Op != OpAMD64ANDLconst {
-                       break
-               }
-               c := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = int32ToAuxInt(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQZX (MOVLQZX x))
-       // result: (MOVLQZX x)
-       for {
-               if v_0.Op != OpAMD64MOVLQZX {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVLQZX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQZX (MOVWQZX x))
-       // result: (MOVWQZX x)
-       for {
-               if v_0.Op != OpAMD64MOVWQZX {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVWQZX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQZX (MOVBQZX x))
-       // result: (MOVBQZX x)
-       for {
-               if v_0.Op != OpAMD64MOVBQZX {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQZX)
-               v.AddArg(x)
+               v.AddArg2(ptr, mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLatomicload(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVLatomicload [off1+off2] {sym} ptr mem)
+       // result: (MOVQatomicload [off1+off2] {sym} ptr mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11717,15 +11846,15 @@ func rewriteValueAMD64_OpAMD64MOVLatomicload(v *Value) bool {
                if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MOVLatomicload)
+               v.reset(OpAMD64MOVQatomicload)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
        // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVLatomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem)
+       // result: (MOVQatomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -11739,7 +11868,7 @@ func rewriteValueAMD64_OpAMD64MOVLatomicload(v *Value) bool {
                if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVLatomicload)
+               v.reset(OpAMD64MOVQatomicload)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg2(ptr, mem)
@@ -11747,10 +11876,10 @@ func rewriteValueAMD64_OpAMD64MOVLatomicload(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLf2i(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVQf2i(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (MOVLf2i <t> (Arg <u> [off] {sym}))
+       // match: (MOVQf2i <t> (Arg <u> [off] {sym}))
        // cond: t.Size() == u.Size()
        // result: @b.Func.Entry (Arg <t> [off] {sym})
        for {
@@ -11773,10 +11902,10 @@ func rewriteValueAMD64_OpAMD64MOVLf2i(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLi2f(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVQi2f(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (MOVLi2f <t> (Arg <u> [off] {sym}))
+       // match: (MOVQi2f <t> (Arg <u> [off] {sym}))
        // cond: t.Size() == u.Size()
        // result: @b.Func.Entry (Arg <t> [off] {sym})
        for {
@@ -11799,19 +11928,19 @@ func rewriteValueAMD64_OpAMD64MOVLi2f(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
-       // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
+       // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _))
        // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVLQZX x)
+       // result: x
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpAMD64MOVLstore {
+               if v_1.Op != OpAMD64MOVQstore {
                        break
                }
                off2 := auxIntToInt32(v_1.AuxInt)
@@ -11821,13 +11950,12 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
                if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpAMD64MOVLQZX)
-               v.AddArg(x)
+               v.copyOf(x)
                return true
        }
-       // match: (MOVLload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVLload [off1+off2] {sym} ptr mem)
+       // result: (MOVQload [off1+off2] {sym} ptr mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11840,15 +11968,15 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
                if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MOVLload)
+               v.reset(OpAMD64MOVQload)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -11862,32 +11990,32 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
                if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVLload)
+               v.reset(OpAMD64MOVQload)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg2(base, mem)
                return true
        }
-       // match: (MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _))
-       // result: (MOVLf2i val)
+       // match: (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _))
+       // result: (MOVQf2i val)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpAMD64MOVSSstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+               if v_1.Op != OpAMD64MOVSDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
                        break
                }
                val := v_1.Args[1]
                if ptr != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLf2i)
+               v.reset(OpAMD64MOVQf2i)
                v.AddArg(val)
                return true
        }
-       // match: (MOVLload [off] {sym} (SB) _)
+       // match: (MOVQload [off] {sym} (SB) _)
        // cond: symIsRO(sym)
-       // result: (MOVQconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))])
+       // result: (MOVQconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))])
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11895,54 +12023,18 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
                        break
                }
                v.reset(OpAMD64MOVQconst)
-               v.AuxInt = int64ToAuxInt(int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder)))
+               v.AuxInt = int64ToAuxInt(int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder)))
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem)
-       // result: (MOVLstore [off] {sym} ptr x mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLQSX {
-                       break
-               }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem)
-       // result: (MOVLstore [off] {sym} ptr x mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLQZX {
-                       break
-               }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVLstore [off1+off2] {sym} ptr val mem)
+       // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (MOVQstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11956,31 +12048,15 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
+               v.reset(OpAMD64MOVQstore)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // result: (MOVLstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := auxIntToInt32(v_1.AuxInt)
-               mem := v_2
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVQconst [c]) mem)
-       // result: (MOVLstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem)
+       // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem)
+       // cond: validVal(c)
+       // result: (MOVQstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11990,15 +12066,18 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                }
                c := auxIntToInt64(v_1.AuxInt)
                mem := v_2
-               v.reset(OpAMD64MOVLstoreconst)
+               if !(validVal(c)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreconst)
                v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -12013,171 +12092,21 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
+               v.reset(OpAMD64MOVQstore)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstore [i-4] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 32 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVLstore || auxIntToInt32(x.AuxInt) != i-4 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(i - 4)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstore [i-4] {s} p w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := auxIntToInt8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVLstore || auxIntToInt32(x.AuxInt) != i-4 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst || auxIntToInt8(w0.AuxInt) != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(i - 4)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
-               return true
-       }
-       // match: (MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i] {s} p0 w mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)
-       // result: (MOVQstore [i] {s} p0 w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 32 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVLstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
-               return true
-       }
-       // match: (MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i] {s} p0 w0:(SHRQconst [j-32] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)
-       // result: (MOVQstore [i] {s} p0 w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := auxIntToInt8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVLstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst || auxIntToInt8(w0.AuxInt) != j-32 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w0, mem)
-               return true
-       }
-       // match: (MOVLstore [i] {s} p x1:(MOVLload [j] {s2} p2 mem) mem2:(MOVLstore [i-4] {s} p x2:(MOVLload [j-4] {s2} p2 mem) mem))
-       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)
-       // result: (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               x1 := v_1
-               if x1.Op != OpAMD64MOVLload {
-                       break
-               }
-               j := auxIntToInt32(x1.AuxInt)
-               s2 := auxToSym(x1.Aux)
-               mem := x1.Args[1]
-               p2 := x1.Args[0]
-               mem2 := v_2
-               if mem2.Op != OpAMD64MOVLstore || auxIntToInt32(mem2.AuxInt) != i-4 || auxToSym(mem2.Aux) != s {
-                       break
-               }
-               _ = mem2.Args[2]
-               if p != mem2.Args[0] {
-                       break
-               }
-               x2 := mem2.Args[1]
-               if x2.Op != OpAMD64MOVLload || auxIntToInt32(x2.AuxInt) != j-4 || auxToSym(x2.Aux) != s2 {
-                       break
-               }
-               _ = x2.Args[1]
-               if p2 != x2.Args[0] || mem != x2.Args[1] || mem != mem2.Args[2] || !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(i - 4)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x2.Pos, OpAMD64MOVQload, typ.UInt64)
-               v0.AuxInt = int32ToAuxInt(j - 4)
-               v0.Aux = symToAux(s2)
-               v0.AddArg2(p2, mem)
-               v.AddArg3(p, v0, mem)
-               return true
-       }
-       // match: (MOVLstore {sym} [off] ptr y:(ADDLload x [off] {sym} ptr mem) mem)
+       // match: (MOVQstore {sym} [off] ptr y:(ADDQload x [off] {sym} ptr mem) mem)
        // cond: y.Uses==1 && clobber(y)
-       // result: (ADDLmodify [off] {sym} ptr x mem)
+       // result: (ADDQmodify [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                y := v_1
-               if y.Op != OpAMD64ADDLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
+               if y.Op != OpAMD64ADDQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
                        break
                }
                mem := y.Args[2]
@@ -12185,21 +12114,21 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.reset(OpAMD64ADDLmodify)
+               v.reset(OpAMD64ADDQmodify)
                v.AuxInt = int32ToAuxInt(off)
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVLstore {sym} [off] ptr y:(ANDLload x [off] {sym} ptr mem) mem)
+       // match: (MOVQstore {sym} [off] ptr y:(ANDQload x [off] {sym} ptr mem) mem)
        // cond: y.Uses==1 && clobber(y)
-       // result: (ANDLmodify [off] {sym} ptr x mem)
+       // result: (ANDQmodify [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                y := v_1
-               if y.Op != OpAMD64ANDLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
+               if y.Op != OpAMD64ANDQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
                        break
                }
                mem := y.Args[2]
@@ -12207,21 +12136,21 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.reset(OpAMD64ANDLmodify)
+               v.reset(OpAMD64ANDQmodify)
                v.AuxInt = int32ToAuxInt(off)
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVLstore {sym} [off] ptr y:(ORLload x [off] {sym} ptr mem) mem)
+       // match: (MOVQstore {sym} [off] ptr y:(ORQload x [off] {sym} ptr mem) mem)
        // cond: y.Uses==1 && clobber(y)
-       // result: (ORLmodify [off] {sym} ptr x mem)
+       // result: (ORQmodify [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                y := v_1
-               if y.Op != OpAMD64ORLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
+               if y.Op != OpAMD64ORQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
                        break
                }
                mem := y.Args[2]
@@ -12229,21 +12158,21 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.reset(OpAMD64ORLmodify)
+               v.reset(OpAMD64ORQmodify)
                v.AuxInt = int32ToAuxInt(off)
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVLstore {sym} [off] ptr y:(XORLload x [off] {sym} ptr mem) mem)
+       // match: (MOVQstore {sym} [off] ptr y:(XORQload x [off] {sym} ptr mem) mem)
        // cond: y.Uses==1 && clobber(y)
-       // result: (XORLmodify [off] {sym} ptr x mem)
+       // result: (XORQmodify [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                y := v_1
-               if y.Op != OpAMD64XORLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
+               if y.Op != OpAMD64XORQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
                        break
                }
                mem := y.Args[2]
@@ -12251,21 +12180,21 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
                        break
                }
-               v.reset(OpAMD64XORLmodify)
+               v.reset(OpAMD64XORQmodify)
                v.AuxInt = int32ToAuxInt(off)
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVLstore {sym} [off] ptr y:(ADDL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // match: (MOVQstore {sym} [off] ptr y:(ADDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
        // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (ADDLmodify [off] {sym} ptr x mem)
+       // result: (ADDQmodify [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                y := v_1
-               if y.Op != OpAMD64ADDL {
+               if y.Op != OpAMD64ADDQ {
                        break
                }
                _ = y.Args[1]
@@ -12273,7 +12202,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                y_1 := y.Args[1]
                for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
                        l := y_0
-                       if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+                       if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                                continue
                        }
                        mem := l.Args[1]
@@ -12284,7 +12213,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                        if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
                                continue
                        }
-                       v.reset(OpAMD64ADDLmodify)
+                       v.reset(OpAMD64ADDQmodify)
                        v.AuxInt = int32ToAuxInt(off)
                        v.Aux = symToAux(sym)
                        v.AddArg3(ptr, x, mem)
@@ -12292,41 +12221,41 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                }
                break
        }
-       // match: (MOVLstore {sym} [off] ptr y:(SUBL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // match: (MOVQstore {sym} [off] ptr y:(SUBQ l:(MOVQload [off] {sym} ptr mem) x) mem)
        // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (SUBLmodify [off] {sym} ptr x mem)
+       // result: (SUBQmodify [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                y := v_1
-               if y.Op != OpAMD64SUBL {
+               if y.Op != OpAMD64SUBQ {
                        break
                }
                x := y.Args[1]
                l := y.Args[0]
-               if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                        break
                }
                mem := l.Args[1]
                if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
                        break
                }
-               v.reset(OpAMD64SUBLmodify)
+               v.reset(OpAMD64SUBQmodify)
                v.AuxInt = int32ToAuxInt(off)
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVLstore {sym} [off] ptr y:(ANDL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // match: (MOVQstore {sym} [off] ptr y:(ANDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
        // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (ANDLmodify [off] {sym} ptr x mem)
+       // result: (ANDQmodify [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                y := v_1
-               if y.Op != OpAMD64ANDL {
+               if y.Op != OpAMD64ANDQ {
                        break
                }
                _ = y.Args[1]
@@ -12334,7 +12263,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                y_1 := y.Args[1]
                for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
                        l := y_0
-                       if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+                       if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                                continue
                        }
                        mem := l.Args[1]
@@ -12345,7 +12274,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                        if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
                                continue
                        }
-                       v.reset(OpAMD64ANDLmodify)
+                       v.reset(OpAMD64ANDQmodify)
                        v.AuxInt = int32ToAuxInt(off)
                        v.Aux = symToAux(sym)
                        v.AddArg3(ptr, x, mem)
@@ -12353,15 +12282,15 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                }
                break
        }
-       // match: (MOVLstore {sym} [off] ptr y:(ORL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // match: (MOVQstore {sym} [off] ptr y:(ORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
        // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (ORLmodify [off] {sym} ptr x mem)
+       // result: (ORQmodify [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                y := v_1
-               if y.Op != OpAMD64ORL {
+               if y.Op != OpAMD64ORQ {
                        break
                }
                _ = y.Args[1]
@@ -12369,7 +12298,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                y_1 := y.Args[1]
                for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
                        l := y_0
-                       if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+                       if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                                continue
                        }
                        mem := l.Args[1]
@@ -12380,7 +12309,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                        if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
                                continue
                        }
-                       v.reset(OpAMD64ORLmodify)
+                       v.reset(OpAMD64ORQmodify)
                        v.AuxInt = int32ToAuxInt(off)
                        v.Aux = symToAux(sym)
                        v.AddArg3(ptr, x, mem)
@@ -12388,15 +12317,15 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                }
                break
        }
-       // match: (MOVLstore {sym} [off] ptr y:(XORL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // match: (MOVQstore {sym} [off] ptr y:(XORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
        // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (XORLmodify [off] {sym} ptr x mem)
+       // result: (XORQmodify [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                y := v_1
-               if y.Op != OpAMD64XORL {
+               if y.Op != OpAMD64XORQ {
                        break
                }
                _ = y.Args[1]
@@ -12404,7 +12333,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                y_1 := y.Args[1]
                for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
                        l := y_0
-                       if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+                       if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                                continue
                        }
                        mem := l.Args[1]
@@ -12415,7 +12344,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                        if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
                                continue
                        }
-                       v.reset(OpAMD64XORLmodify)
+                       v.reset(OpAMD64XORQmodify)
                        v.AuxInt = int32ToAuxInt(off)
                        v.Aux = symToAux(sym)
                        v.AddArg3(ptr, x, mem)
@@ -12423,20 +12352,20 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                }
                break
        }
-       // match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
        // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
-       // result: (ADDLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       // result: (ADDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                a := v_1
-               if a.Op != OpAMD64ADDLconst {
+               if a.Op != OpAMD64ADDQconst {
                        break
                }
                c := auxIntToInt32(a.AuxInt)
                l := a.Args[0]
-               if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                        break
                }
                mem := l.Args[1]
@@ -12444,26 +12373,26 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
                        break
                }
-               v.reset(OpAMD64ADDLconstmodify)
+               v.reset(OpAMD64ADDQconstmodify)
                v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLstore [off] {sym} ptr a:(ANDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       // match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
        // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
-       // result: (ANDLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       // result: (ANDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                a := v_1
-               if a.Op != OpAMD64ANDLconst {
+               if a.Op != OpAMD64ANDQconst {
                        break
                }
                c := auxIntToInt32(a.AuxInt)
                l := a.Args[0]
-               if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                        break
                }
                mem := l.Args[1]
@@ -12471,26 +12400,26 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
                        break
                }
-               v.reset(OpAMD64ANDLconstmodify)
+               v.reset(OpAMD64ANDQconstmodify)
                v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLstore [off] {sym} ptr a:(ORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
        // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
-       // result: (ORLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       // result: (ORQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                a := v_1
-               if a.Op != OpAMD64ORLconst {
+               if a.Op != OpAMD64ORQconst {
                        break
                }
                c := auxIntToInt32(a.AuxInt)
                l := a.Args[0]
-               if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                        break
                }
                mem := l.Args[1]
@@ -12498,26 +12427,26 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
                        break
                }
-               v.reset(OpAMD64ORLconstmodify)
+               v.reset(OpAMD64ORQconstmodify)
                v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       // match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
        // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
-       // result: (XORLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       // result: (XORQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
                a := v_1
-               if a.Op != OpAMD64XORLconst {
+               if a.Op != OpAMD64XORQconst {
                        break
                }
                c := auxIntToInt32(a.AuxInt)
                l := a.Args[0]
-               if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
                        break
                }
                mem := l.Args[1]
@@ -12525,38 +12454,38 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
                        break
                }
-               v.reset(OpAMD64XORLconstmodify)
+               v.reset(OpAMD64XORQconstmodify)
                v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem)
-       // result: (MOVSSstore [off] {sym} ptr val mem)
+       // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem)
+       // result: (MOVSDstore [off] {sym} ptr val mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpAMD64MOVLf2i {
+               if v_1.Op != OpAMD64MOVQf2i {
                        break
                }
                val := v_1.Args[0]
                mem := v_2
-               v.reset(OpAMD64MOVSSstore)
+               v.reset(OpAMD64MOVSDstore)
                v.AuxInt = int32ToAuxInt(off)
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVLstore [i] {s} p x:(BSWAPL w) mem)
+       // match: (MOVQstore [i] {s} p x:(BSWAPQ w) mem)
        // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3
-       // result: (MOVBELstore [i] {s} p w mem)
+       // result: (MOVBEQstore [i] {s} p w mem)
        for {
                i := auxIntToInt32(v.AuxInt)
                s := auxToSym(v.Aux)
                p := v_0
                x := v_1
-               if x.Op != OpAMD64BSWAPL {
+               if x.Op != OpAMD64BSWAPQ {
                        break
                }
                w := x.Args[0]
@@ -12564,7 +12493,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
                if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) {
                        break
                }
-               v.reset(OpAMD64MOVBELstore)
+               v.reset(OpAMD64MOVBEQstore)
                v.AuxInt = int32ToAuxInt(i)
                v.Aux = symToAux(s)
                v.AddArg3(p, w, mem)
@@ -12572,14 +12501,14 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       config := b.Func.Config
+       // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
        // cond: ValAndOff(sc).canAdd32(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
+       // result: (MOVQstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
        for {
                sc := auxIntToValAndOff(v.AuxInt)
                s := auxToSym(v.Aux)
@@ -12592,15 +12521,15 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
                if !(ValAndOff(sc).canAdd32(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
+               v.reset(OpAMD64MOVQstoreconst)
                v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
                v.Aux = symToAux(s)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
        // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // result: (MOVQstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
                sc := auxIntToValAndOff(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -12614,21 +12543,21 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
+               v.reset(OpAMD64MOVQstoreconst)
                v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) && clobber(x)
-       // result: (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem)
+       // match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem))
+       // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)
+       // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem)
        for {
                c := auxIntToValAndOff(v.AuxInt)
                s := auxToSym(v.Aux)
                p1 := v_0
                x := v_1
-               if x.Op != OpAMD64MOVLstoreconst {
+               if x.Op != OpAMD64MOVQstoreconst {
                        break
                }
                a := auxIntToValAndOff(x.AuxInt)
@@ -12637,26 +12566,24 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
                }
                mem := x.Args[1]
                p0 := x.Args[0]
-               if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) && clobber(x)) {
+               if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(a.Off())
+               v.reset(OpAMD64MOVOstoreconst)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, a.Off()))
                v.Aux = symToAux(s)
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(a.Val64()&0xffffffff | c.Val64()<<32)
-               v.AddArg3(p0, v0, mem)
+               v.AddArg2(p0, mem)
                return true
        }
-       // match: (MOVLstoreconst [a] {s} p0 x:(MOVLstoreconst [c] {s} p1 mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) && clobber(x)
-       // result: (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem)
+       // match: (MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem))
+       // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)
+       // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem)
        for {
                a := auxIntToValAndOff(v.AuxInt)
                s := auxToSym(v.Aux)
                p0 := v_0
                x := v_1
-               if x.Op != OpAMD64MOVLstoreconst {
+               if x.Op != OpAMD64MOVQstoreconst {
                        break
                }
                c := auxIntToValAndOff(x.AuxInt)
@@ -12665,25 +12592,23 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
                }
                mem := x.Args[1]
                p1 := x.Args[0]
-               if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) && clobber(x)) {
+               if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(a.Off())
+               v.reset(OpAMD64MOVOstoreconst)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, a.Off()))
                v.Aux = symToAux(s)
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(a.Val64()&0xffffffff | c.Val64()<<32)
-               v.AddArg3(p0, v0, mem)
+               v.AddArg2(p0, mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVOload [off1+off2] {sym} ptr mem)
+       // result: (MOVSDload [off1+off2] {sym} ptr mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -12696,15 +12621,15 @@ func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool {
                if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MOVOload)
+               v.reset(OpAMD64MOVSDload)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -12718,24 +12643,38 @@ func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool {
                if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVOload)
+               v.reset(OpAMD64MOVSDload)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg2(base, mem)
                return true
        }
+       // match: (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _))
+       // result: (MOVQi2f val)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVQstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64MOVQi2f)
+               v.AddArg(val)
+               return true
+       }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVSDstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       typ := &b.Func.Config.Types
-       // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVOstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -12749,15 +12688,15 @@ func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool {
                if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MOVOstore)
+               v.reset(OpAMD64MOVSDstore)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -12772,102 +12711,103 @@ func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool {
                if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVOstore)
+               v.reset(OpAMD64MOVSDstore)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVOstore [dstOff] {dstSym} ptr (MOVOload [srcOff] {srcSym} (SB) _) mem)
-       // cond: symIsRO(srcSym)
-       // result: (MOVQstore [dstOff+8] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff)+8, config.ctxt.Arch.ByteOrder))]) (MOVQstore [dstOff] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff), config.ctxt.Arch.ByteOrder))]) mem))
+       // match: (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem)
+       // result: (MOVQstore [off] {sym} ptr val mem)
        for {
-               dstOff := auxIntToInt32(v.AuxInt)
-               dstSym := auxToSym(v.Aux)
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpAMD64MOVOload {
-                       break
-               }
-               srcOff := auxIntToInt32(v_1.AuxInt)
-               srcSym := auxToSym(v_1.Aux)
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpSB {
+               if v_1.Op != OpAMD64MOVQi2f {
                        break
                }
+               val := v_1.Args[0]
                mem := v_2
-               if !(symIsRO(srcSym)) {
-                       break
-               }
                v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(dstOff + 8)
-               v.Aux = symToAux(dstSym)
-               v0 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(int64(read64(srcSym, int64(srcOff)+8, config.ctxt.Arch.ByteOrder)))
-               v1 := b.NewValue0(v_1.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v1.AuxInt = int32ToAuxInt(dstOff)
-               v1.Aux = symToAux(dstSym)
-               v2 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v2.AuxInt = int64ToAuxInt(int64(read64(srcSym, int64(srcOff), config.ctxt.Arch.ByteOrder)))
-               v1.AddArg3(ptr, v2, mem)
-               v.AddArg3(ptr, v0, v1)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVOstoreconst(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVSSload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVOstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd32(off)
-       // result: (MOVOstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
+       // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (MOVSSload [off1+off2] {sym} ptr mem)
        for {
-               sc := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off := auxIntToInt32(v_0.AuxInt)
+               off2 := auxIntToInt32(v_0.AuxInt)
                ptr := v_0.Args[0]
                mem := v_1
-               if !(ValAndOff(sc).canAdd32(off)) {
+               if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MOVOstoreconst)
-               v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
-               v.Aux = symToAux(s)
+               v.reset(OpAMD64MOVSSload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVOstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)
-       // result: (MOVOstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               sc := auxIntToValAndOff(v.AuxInt)
+               off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
                if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               off := auxIntToInt32(v_0.AuxInt)
+               off2 := auxIntToInt32(v_0.AuxInt)
                sym2 := auxToSym(v_0.Aux)
-               ptr := v_0.Args[0]
+               base := v_0.Args[0]
                mem := v_1
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) {
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVOstoreconst)
-               v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
+               v.reset(OpAMD64MOVSSload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(ptr, mem)
+               v.AddArg2(base, mem)
+               return true
+       }
+       // match: (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _))
+       // result: (MOVLi2f val)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64MOVLi2f)
+               v.AddArg(val)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVSSstore(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVQatomicload [off1+off2] {sym} ptr mem)
+       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -12876,19 +12816,20 @@ func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value) bool {
                }
                off2 := auxIntToInt32(v_0.AuxInt)
                ptr := v_0.Args[0]
-               mem := v_1
+               val := v_1
+               mem := v_2
                if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MOVQatomicload)
+               v.reset(OpAMD64MOVSSstore)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVQatomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem)
+       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -12897,221 +12838,177 @@ func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value) bool {
                }
                off2 := auxIntToInt32(v_0.AuxInt)
                sym2 := auxToSym(v_0.Aux)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQatomicload)
+               v.reset(OpAMD64MOVSSstore)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(ptr, mem)
+               v.AddArg3(base, val, mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQf2i(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVQf2i <t> (Arg <u> [off] {sym}))
-       // cond: t.Size() == u.Size()
-       // result: @b.Func.Entry (Arg <t> [off] {sym})
+       // match: (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem)
+       // result: (MOVLstore [off] {sym} ptr val mem)
        for {
-               t := v.Type
-               if v_0.Op != OpArg {
-                       break
-               }
-               u := v_0.Type
-               off := auxIntToInt32(v_0.AuxInt)
-               sym := auxToSym(v_0.Aux)
-               if !(t.Size() == u.Size()) {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpAMD64MOVLi2f {
                        break
                }
-               b = b.Func.Entry
-               v0 := b.NewValue0(v.Pos, OpArg, t)
-               v.copyOf(v0)
-               v0.AuxInt = int32ToAuxInt(off)
-               v0.Aux = symToAux(sym)
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQi2f(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (MOVQi2f <t> (Arg <u> [off] {sym}))
-       // cond: t.Size() == u.Size()
-       // result: @b.Func.Entry (Arg <t> [off] {sym})
+       // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               t := v.Type
-               if v_0.Op != OpArg {
+               x := v_0
+               if x.Op != OpAMD64MOVWload {
                        break
                }
-               u := v_0.Type
-               off := auxIntToInt32(v_0.AuxInt)
-               sym := auxToSym(v_0.Aux)
-               if !(t.Size() == u.Size()) {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               b = b.Func.Entry
-               v0 := b.NewValue0(v.Pos, OpArg, t)
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
                v.copyOf(v0)
                v0.AuxInt = int32ToAuxInt(off)
                v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: x
+       // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVQstore {
+               x := v_0
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               x := v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.copyOf(x)
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVQload [off1+off2] {sym} ptr mem)
+       // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64ADDQconst {
+               x := v_0
+               if x.Op != OpAMD64MOVQload {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1) + int64(off2))) {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MOVWQSX (ANDLconst [c] x))
+       // cond: c & 0x8000 == 0
+       // result: (ANDLconst [c & 0x7fff] x)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               base := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+               c := auxIntToInt32(v_0.AuxInt)
+               x := v_0.Args[0]
+               if !(c&0x8000 == 0) {
                        break
                }
-               v.reset(OpAMD64MOVQload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(base, mem)
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = int32ToAuxInt(c & 0x7fff)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _))
-       // result: (MOVQf2i val)
+       // match: (MOVWQSX (MOVWQSX x))
+       // result: (MOVWQSX x)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVSDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
-                       break
-               }
-               val := v_1.Args[1]
-               if ptr != v_1.Args[0] {
+               if v_0.Op != OpAMD64MOVWQSX {
                        break
                }
-               v.reset(OpAMD64MOVQf2i)
-               v.AddArg(val)
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVWQSX)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVQload [off] {sym} (SB) _)
-       // cond: symIsRO(sym)
-       // result: (MOVQconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))])
+       // match: (MOVWQSX (MOVBQSX x))
+       // result: (MOVBQSX x)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpSB || !(symIsRO(sym)) {
+               if v_0.Op != OpAMD64MOVBQSX {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = int64ToAuxInt(int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder)))
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQSX)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAMD64MOVWQSXload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVQstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem)
-       // cond: validVal(c)
-       // result: (MOVQstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem)
+       // match: (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVWQSX x)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVWstore {
                        break
                }
-               c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               if !(validVal(c)) {
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               v.reset(OpAMD64MOVWQSX)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -13121,413 +13018,399 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
                off2 := auxIntToInt32(v_0.AuxInt)
                sym2 := auxToSym(v_0.Aux)
                base := v_0.Args[0]
-               val := v_1
-               mem := v_2
+               mem := v_1
                if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
+               v.reset(OpAMD64MOVWQSXload)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg3(base, val, mem)
+               v.AddArg2(base, mem)
                return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(ADDQload x [off] {sym} ptr mem) mem)
-       // cond: y.Uses==1 && clobber(y)
-       // result: (ADDQmodify [off] {sym} ptr x mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ADDQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVWload {
                        break
                }
-               mem := y.Args[2]
-               x := y.Args[0]
-               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64ADDQmodify)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(ANDQload x [off] {sym} ptr mem) mem)
-       // cond: y.Uses==1 && clobber(y)
-       // result: (ANDQmodify [off] {sym} ptr x mem)
+       // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ANDQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
+               x := v_0
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               mem := y.Args[2]
-               x := y.Args[0]
-               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64ANDQmodify)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(ORQload x [off] {sym} ptr mem) mem)
-       // cond: y.Uses==1 && clobber(y)
-       // result: (ORQmodify [off] {sym} ptr x mem)
+       // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ORQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
+               x := v_0
+               if x.Op != OpAMD64MOVQload {
                        break
                }
-               mem := y.Args[2]
-               x := y.Args[0]
-               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64ORQmodify)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(XORQload x [off] {sym} ptr mem) mem)
-       // cond: y.Uses==1 && clobber(y)
-       // result: (XORQmodify [off] {sym} ptr x mem)
+       // match: (MOVWQZX x)
+       // cond: zeroUpper48Bits(x,3)
+       // result: x
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64XORQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym {
+               x := v_0
+               if !(zeroUpper48Bits(x, 3)) {
                        break
                }
-               mem := y.Args[2]
-               x := y.Args[0]
-               if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) {
+               v.copyOf(x)
+               return true
+       }
+       // match: (MOVWQZX (ANDLconst [c] x))
+       // result: (ANDLconst [c & 0xffff] x)
+       for {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               v.reset(OpAMD64XORQmodify)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
+               c := auxIntToInt32(v_0.AuxInt)
+               x := v_0.Args[0]
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = int32ToAuxInt(c & 0xffff)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(ADDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (ADDQmodify [off] {sym} ptr x mem)
+       // match: (MOVWQZX (MOVWQZX x))
+       // result: (MOVWQZX x)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ADDQ {
+               if v_0.Op != OpAMD64MOVWQZX {
                        break
                }
-               _ = y.Args[1]
-               y_0 := y.Args[0]
-               y_1 := y.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-                       l := y_0
-                       if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
-                               continue
-                       }
-                       mem := l.Args[1]
-                       if ptr != l.Args[0] {
-                               continue
-                       }
-                       x := y_1
-                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64ADDQmodify)
-                       v.AuxInt = int32ToAuxInt(off)
-                       v.Aux = symToAux(sym)
-                       v.AddArg3(ptr, x, mem)
-                       return true
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVWQZX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWQZX (MOVBQZX x))
+       // result: (MOVBQZX x)
+       for {
+               if v_0.Op != OpAMD64MOVBQZX {
+                       break
                }
-               break
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQZX)
+               v.AddArg(x)
+               return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(SUBQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (SUBQmodify [off] {sym} ptr x mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVWQZX x)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64SUBQ {
+               if v_1.Op != OpAMD64MOVWstore {
                        break
                }
-               x := y.Args[1]
-               l := y.Args[0]
-               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               mem := l.Args[1]
-               if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
+               v.reset(OpAMD64MOVWQZX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (MOVWload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               v.reset(OpAMD64SUBQmodify)
-               v.AuxInt = int32ToAuxInt(off)
+               off2 := auxIntToInt32(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1) + int64(off2))) {
+                       break
+               }
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(ANDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (ANDQmodify [off] {sym} ptr x mem)
+       // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(base, mem)
+               return true
+       }
+       // match: (MOVWload [off] {sym} (SB) _)
+       // cond: symIsRO(sym)
+       // result: (MOVLconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ANDQ {
+               if v_0.Op != OpSB || !(symIsRO(sym)) {
                        break
                }
-               _ = y.Args[1]
-               y_0 := y.Args[0]
-               y_1 := y.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-                       l := y_0
-                       if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
-                               continue
-                       }
-                       mem := l.Args[1]
-                       if ptr != l.Args[0] {
-                               continue
-                       }
-                       x := y_1
-                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64ANDQmodify)
-                       v.AuxInt = int32ToAuxInt(off)
-                       v.Aux = symToAux(sym)
-                       v.AddArg3(ptr, x, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVQstore {sym} [off] ptr y:(ORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (ORQmodify [off] {sym} ptr x mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64ORQ {
-                       break
-               }
-               _ = y.Args[1]
-               y_0 := y.Args[0]
-               y_1 := y.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-                       l := y_0
-                       if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
-                               continue
-                       }
-                       mem := l.Args[1]
-                       if ptr != l.Args[0] {
-                               continue
-                       }
-                       x := y_1
-                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64ORQmodify)
-                       v.AuxInt = int32ToAuxInt(off)
-                       v.Aux = symToAux(sym)
-                       v.AddArg3(ptr, x, mem)
-                       return true
-               }
-               break
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder)))
+               return true
        }
-       // match: (MOVQstore {sym} [off] ptr y:(XORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
-       // cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-       // result: (XORQmodify [off] {sym} ptr x mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem)
+       // result: (MOVWstore [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               y := v_1
-               if y.Op != OpAMD64XORQ {
+               if v_1.Op != OpAMD64MOVWQSX {
                        break
                }
-               _ = y.Args[1]
-               y_0 := y.Args[0]
-               y_1 := y.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 {
-                       l := y_0
-                       if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
-                               continue
-                       }
-                       mem := l.Args[1]
-                       if ptr != l.Args[0] {
-                               continue
-                       }
-                       x := y_1
-                       if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64XORQmodify)
-                       v.AuxInt = int32ToAuxInt(off)
-                       v.Aux = symToAux(sym)
-                       v.AddArg3(ptr, x, mem)
-                       return true
-               }
-               break
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
        }
-       // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
-       // result: (ADDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem)
+       // result: (MOVWstore [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               a := v_1
-               if a.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := auxIntToInt32(a.AuxInt)
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
-                       break
-               }
-               mem := l.Args[1]
-               ptr2 := l.Args[0]
-               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
+               if v_1.Op != OpAMD64MOVWQZX {
                        break
                }
-               v.reset(OpAMD64ADDQconstmodify)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = int32ToAuxInt(off)
                v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
-       // result: (ANDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
        for {
-               off := auxIntToInt32(v.AuxInt)
+               off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
-               ptr := v_0
-               a := v_1
-               if a.Op != OpAMD64ANDQconst {
-                       break
-               }
-               c := auxIntToInt32(a.AuxInt)
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               mem := l.Args[1]
-               ptr2 := l.Args[0]
-               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64ANDQconstmodify)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
-       // result: (ORQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // result: (MOVWstoreconst [makeValAndOff(int32(int16(c)),off)] {sym} ptr mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               a := v_1
-               if a.Op != OpAMD64ORQconst {
-                       break
-               }
-               c := auxIntToInt32(a.AuxInt)
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
-                       break
-               }
-               mem := l.Args[1]
-               ptr2 := l.Args[0]
-               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64ORQconstmodify)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               c := auxIntToInt32(v_1.AuxInt)
+               mem := v_2
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int16(c)), off))
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)
-       // result: (XORQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVQconst [c]) mem)
+       // result: (MOVWstoreconst [makeValAndOff(int32(int16(c)),off)] {sym} ptr mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               a := v_1
-               if a.Op != OpAMD64XORQconst {
-                       break
-               }
-               c := auxIntToInt32(a.AuxInt)
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
-                       break
-               }
-               mem := l.Args[1]
-               ptr2 := l.Args[0]
-               if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64XORQconstmodify)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off))
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int16(c)), off))
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem)
-       // result: (MOVSDstore [off] {sym} ptr val mem)
+       // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVQf2i {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               val := v_1.Args[0]
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               val := v_1
                mem := v_2
-               v.reset(OpAMD64MOVSDstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, val, mem)
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVQstore [i] {s} p x:(BSWAPQ w) mem)
-       // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3
-       // result: (MOVBEQstore [i] {s} p w mem)
+       // match: (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) mem2:(MOVWstore [i-2] {s} p x2:(MOVWload [j-2] {s2} p2 mem) mem))
+       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)
+       // result: (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem)
        for {
                i := auxIntToInt32(v.AuxInt)
                s := auxToSym(v.Aux)
                p := v_0
-               x := v_1
-               if x.Op != OpAMD64BSWAPQ {
+               x1 := v_1
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
-               w := x.Args[0]
-               mem := v_2
-               if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) {
+               j := auxIntToInt32(x1.AuxInt)
+               s2 := auxToSym(x1.Aux)
+               mem := x1.Args[1]
+               p2 := x1.Args[0]
+               mem2 := v_2
+               if mem2.Op != OpAMD64MOVWstore || auxIntToInt32(mem2.AuxInt) != i-2 || auxToSym(mem2.Aux) != s {
                        break
                }
-               v.reset(OpAMD64MOVBEQstore)
+               _ = mem2.Args[2]
+               if p != mem2.Args[0] {
+                       break
+               }
+               x2 := mem2.Args[1]
+               if x2.Op != OpAMD64MOVWload || auxIntToInt32(x2.AuxInt) != j-2 || auxToSym(x2.Aux) != s2 {
+                       break
+               }
+               _ = x2.Args[1]
+               if p2 != x2.Args[0] || mem != x2.Args[1] || mem != mem2.Args[2] || !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(i - 2)
+               v.Aux = symToAux(s)
+               v0 := b.NewValue0(x2.Pos, OpAMD64MOVLload, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(j - 2)
+               v0.Aux = symToAux(s2)
+               v0.AddArg2(p2, mem)
+               v.AddArg3(p, v0, mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} p x:(ROLWconst [8] w) mem)
+       // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3
+       // result: (MOVBEWstore [i] {s} p w mem)
+       for {
+               i := auxIntToInt32(v.AuxInt)
+               s := auxToSym(v.Aux)
+               p := v_0
+               x := v_1
+               if x.Op != OpAMD64ROLWconst || auxIntToInt8(x.AuxInt) != 8 {
+                       break
+               }
+               w := x.Args[0]
+               mem := v_2
+               if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64MOVBEWstore)
                v.AuxInt = int32ToAuxInt(i)
                v.Aux = symToAux(s)
                v.AddArg3(p, w, mem)
@@ -13535,14 +13418,12 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
        // cond: ValAndOff(sc).canAdd32(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
+       // result: (MOVWstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
        for {
                sc := auxIntToValAndOff(v.AuxInt)
                s := auxToSym(v.Aux)
@@ -13555,15 +13436,15 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool {
                if !(ValAndOff(sc).canAdd32(off)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
+               v.reset(OpAMD64MOVWstoreconst)
                v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
                v.Aux = symToAux(s)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
        // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // result: (MOVWstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
                sc := auxIntToValAndOff(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -13577,4589 +13458,1808 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
+               v.reset(OpAMD64MOVWstoreconst)
                v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem))
-       // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)
-       // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULL(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MULL x (MOVLconst [c]))
+       // result: (MULLconst [c] x)
        for {
-               c := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               x := v_1
-               if x.Op != OpAMD64MOVQstoreconst {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64MOVLconst {
+                               continue
+                       }
+                       c := auxIntToInt32(v_1.AuxInt)
+                       v.reset(OpAMD64MULLconst)
+                       v.AuxInt = int32ToAuxInt(c)
+                       v.AddArg(x)
+                       return true
                }
-               a := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
+               break
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULLconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MULLconst [c] (MULLconst [d] x))
+       // result: (MULLconst [c * d] x)
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               if v_0.Op != OpAMD64MULLconst {
                        break
                }
-               mem := x.Args[1]
-               p0 := x.Args[0]
-               if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)) {
+               d := auxIntToInt32(v_0.AuxInt)
+               x := v_0.Args[0]
+               v.reset(OpAMD64MULLconst)
+               v.AuxInt = int32ToAuxInt(c * d)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [-9] x)
+       // result: (NEGL (LEAL8 <v.Type> x x))
+       for {
+               if auxIntToInt32(v.AuxInt) != -9 {
                        break
                }
-               v.reset(OpAMD64MOVOstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p0, mem)
+               x := v_0
+               v.reset(OpAMD64NEGL)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem))
-       // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)
-       // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem)
+       // match: (MULLconst [-5] x)
+       // result: (NEGL (LEAL4 <v.Type> x x))
        for {
-               a := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               x := v_1
-               if x.Op != OpAMD64MOVQstoreconst {
+               if auxIntToInt32(v.AuxInt) != -5 {
                        break
                }
-               c := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
+               x := v_0
+               v.reset(OpAMD64NEGL)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [-3] x)
+       // result: (NEGL (LEAL2 <v.Type> x x))
+       for {
+               if auxIntToInt32(v.AuxInt) != -3 {
                        break
                }
-               mem := x.Args[1]
-               p1 := x.Args[0]
-               if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)) {
+               x := v_0
+               v.reset(OpAMD64NEGL)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [-1] x)
+       // result: (NEGL x)
+       for {
+               if auxIntToInt32(v.AuxInt) != -1 {
                        break
                }
-               v.reset(OpAMD64MOVOstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p0, mem)
+               x := v_0
+               v.reset(OpAMD64NEGL)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVSDload [off1+off2] {sym} ptr mem)
+       // match: (MULLconst [ 0] _)
+       // result: (MOVLconst [0])
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64ADDQconst {
+               if auxIntToInt32(v.AuxInt) != 0 {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1) + int64(off2))) {
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(0)
+               return true
+       }
+       // match: (MULLconst [ 1] x)
+       // result: x
+       for {
+               if auxIntToInt32(v.AuxInt) != 1 {
                        break
                }
-               v.reset(OpAMD64MOVSDload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               x := v_0
+               v.copyOf(x)
                return true
        }
-       // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MULLconst [ 3] x)
+       // result: (LEAL2 x x)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64LEAQ {
+               if auxIntToInt32(v.AuxInt) != 3 {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               base := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+               x := v_0
+               v.reset(OpAMD64LEAL2)
+               v.AddArg2(x, x)
+               return true
+       }
+       // match: (MULLconst [ 5] x)
+       // result: (LEAL4 x x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 5 {
                        break
                }
-               v.reset(OpAMD64MOVSDload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(base, mem)
+               x := v_0
+               v.reset(OpAMD64LEAL4)
+               v.AddArg2(x, x)
                return true
        }
-       // match: (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _))
-       // result: (MOVQi2f val)
+       // match: (MULLconst [ 7] x)
+       // result: (LEAL2 x (LEAL2 <v.Type> x x))
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVQstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+               if auxIntToInt32(v.AuxInt) != 7 {
                        break
                }
-               val := v_1.Args[1]
-               if ptr != v_1.Args[0] {
+               x := v_0
+               v.reset(OpAMD64LEAL2)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (MULLconst [ 9] x)
+       // result: (LEAL8 x x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 9 {
                        break
                }
-               v.reset(OpAMD64MOVQi2f)
-               v.AddArg(val)
+               x := v_0
+               v.reset(OpAMD64LEAL8)
+               v.AddArg2(x, x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDstore(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
+       // match: (MULLconst [11] x)
+       // result: (LEAL2 x (LEAL4 <v.Type> x x))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
+               if auxIntToInt32(v.AuxInt) != 11 {
                        break
                }
-               v.reset(OpAMD64MOVSDstore)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, val, mem)
+               x := v_0
+               v.reset(OpAMD64LEAL2)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (MULLconst [13] x)
+       // result: (LEAL4 x (LEAL2 <v.Type> x x))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               base := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+               if auxIntToInt32(v.AuxInt) != 13 {
                        break
                }
-               v.reset(OpAMD64MOVSDstore)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg3(base, val, mem)
+               x := v_0
+               v.reset(OpAMD64LEAL4)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem)
-       // result: (MOVQstore [off] {sym} ptr val mem)
+       // match: (MULLconst [19] x)
+       // result: (LEAL2 x (LEAL8 <v.Type> x x))
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVQi2f {
+               if auxIntToInt32(v.AuxInt) != 19 {
                        break
                }
-               val := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, val, mem)
+               x := v_0
+               v.reset(OpAMD64LEAL2)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVSSload [off1+off2] {sym} ptr mem)
+       // match: (MULLconst [21] x)
+       // result: (LEAL4 x (LEAL4 <v.Type> x x))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1) + int64(off2))) {
+               if auxIntToInt32(v.AuxInt) != 21 {
                        break
                }
-               v.reset(OpAMD64MOVSSload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               x := v_0
+               v.reset(OpAMD64LEAL4)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MULLconst [25] x)
+       // result: (LEAL8 x (LEAL2 <v.Type> x x))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               base := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+               if auxIntToInt32(v.AuxInt) != 25 {
                        break
                }
-               v.reset(OpAMD64MOVSSload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(base, mem)
+               x := v_0
+               v.reset(OpAMD64LEAL8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _))
-       // result: (MOVLi2f val)
+       // match: (MULLconst [27] x)
+       // result: (LEAL8 (LEAL2 <v.Type> x x) (LEAL2 <v.Type> x x))
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
-                       break
-               }
-               val := v_1.Args[1]
-               if ptr != v_1.Args[0] {
+               if auxIntToInt32(v.AuxInt) != 27 {
                        break
                }
-               v.reset(OpAMD64MOVLi2f)
-               v.AddArg(val)
+               x := v_0
+               v.reset(OpAMD64LEAL8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(v0, v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSstore(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
+       // match: (MULLconst [37] x)
+       // result: (LEAL4 x (LEAL8 <v.Type> x x))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64ADDQconst {
+               if auxIntToInt32(v.AuxInt) != 37 {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
+               x := v_0
+               v.reset(OpAMD64LEAL4)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
+               return true
+       }
+       // match: (MULLconst [41] x)
+       // result: (LEAL8 x (LEAL4 <v.Type> x x))
+       for {
+               if auxIntToInt32(v.AuxInt) != 41 {
                        break
                }
-               v.reset(OpAMD64MOVSSstore)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, val, mem)
+               x := v_0
+               v.reset(OpAMD64LEAL8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (MULLconst [45] x)
+       // result: (LEAL8 (LEAL4 <v.Type> x x) (LEAL4 <v.Type> x x))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64LEAQ {
+               if auxIntToInt32(v.AuxInt) != 45 {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               base := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+               x := v_0
+               v.reset(OpAMD64LEAL8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(v0, v0)
+               return true
+       }
+       // match: (MULLconst [73] x)
+       // result: (LEAL8 x (LEAL8 <v.Type> x x))
+       for {
+               if auxIntToInt32(v.AuxInt) != 73 {
                        break
                }
-               v.reset(OpAMD64MOVSSstore)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg3(base, val, mem)
+               x := v_0
+               v.reset(OpAMD64LEAL8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem)
-       // result: (MOVLstore [off] {sym} ptr val mem)
+       // match: (MULLconst [81] x)
+       // result: (LEAL8 (LEAL8 <v.Type> x x) (LEAL8 <v.Type> x x))
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLi2f {
+               if auxIntToInt32(v.AuxInt) != 81 {
                        break
                }
-               val := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, val, mem)
+               x := v_0
+               v.reset(OpAMD64LEAL8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(v0, v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo64(int64(c)+1) && c >= 15
+       // result: (SUBL (SHLLconst <v.Type> [int8(log64(int64(c)+1))] x) x)
        for {
+               c := auxIntToInt32(v.AuxInt)
                x := v_0
-               if x.Op != OpAMD64MOVWload {
+               if !(isPowerOfTwo64(int64(c)+1) && c >= 15) {
                        break
                }
-               off := auxIntToInt32(x.AuxInt)
-               sym := auxToSym(x.Aux)
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = int32ToAuxInt(off)
-               v0.Aux = symToAux(sym)
-               v0.AddArg2(ptr, mem)
+               v.reset(OpAMD64SUBL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v0.AuxInt = int8ToAuxInt(int8(log64(int64(c) + 1)))
+               v0.AddArg(x)
+               v.AddArg2(v0, x)
                return true
        }
-       // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo32(c-1) && c >= 17
+       // result: (LEAL1 (SHLLconst <v.Type> [int8(log32(c-1))] x) x)
        for {
+               c := auxIntToInt32(v.AuxInt)
                x := v_0
-               if x.Op != OpAMD64MOVLload {
+               if !(isPowerOfTwo32(c-1) && c >= 17) {
                        break
                }
-               off := auxIntToInt32(x.AuxInt)
-               sym := auxToSym(x.Aux)
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
+               v.reset(OpAMD64LEAL1)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c - 1)))
+               v0.AddArg(x)
+               v.AddArg2(v0, x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo32(c-2) && c >= 34
+       // result: (LEAL2 (SHLLconst <v.Type> [int8(log32(c-2))] x) x)
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(isPowerOfTwo32(c-2) && c >= 34) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = int32ToAuxInt(off)
-               v0.Aux = symToAux(sym)
-               v0.AddArg2(ptr, mem)
+               v.reset(OpAMD64LEAL2)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c - 2)))
+               v0.AddArg(x)
+               v.AddArg2(v0, x)
                return true
        }
-       // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo32(c-4) && c >= 68
+       // result: (LEAL4 (SHLLconst <v.Type> [int8(log32(c-4))] x) x)
        for {
+               c := auxIntToInt32(v.AuxInt)
                x := v_0
-               if x.Op != OpAMD64MOVQload {
+               if !(isPowerOfTwo32(c-4) && c >= 68) {
                        break
                }
-               off := auxIntToInt32(x.AuxInt)
-               sym := auxToSym(x.Aux)
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
+               v.reset(OpAMD64LEAL4)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c - 4)))
+               v0.AddArg(x)
+               v.AddArg2(v0, x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo32(c-8) && c >= 136
+       // result: (LEAL8 (SHLLconst <v.Type> [int8(log32(c-8))] x) x)
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(isPowerOfTwo32(c-8) && c >= 136) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = int32ToAuxInt(off)
-               v0.Aux = symToAux(sym)
-               v0.AddArg2(ptr, mem)
+               v.reset(OpAMD64LEAL8)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c - 8)))
+               v0.AddArg(x)
+               v.AddArg2(v0, x)
                return true
        }
-       // match: (MOVWQSX (ANDLconst [c] x))
-       // cond: c & 0x8000 == 0
-       // result: (ANDLconst [c & 0x7fff] x)
+       // match: (MULLconst [c] x)
+       // cond: c%3 == 0 && isPowerOfTwo32(c/3)
+       // result: (SHLLconst [int8(log32(c/3))] (LEAL2 <v.Type> x x))
        for {
-               if v_0.Op != OpAMD64ANDLconst {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(c%3 == 0 && isPowerOfTwo32(c/3)) {
                        break
                }
-               c := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(c&0x8000 == 0) {
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = int8ToAuxInt(int8(log32(c / 3)))
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: c%5 == 0 && isPowerOfTwo32(c/5)
+       // result: (SHLLconst [int8(log32(c/5))] (LEAL4 <v.Type> x x))
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(c%5 == 0 && isPowerOfTwo32(c/5)) {
                        break
                }
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = int32ToAuxInt(c & 0x7fff)
-               v.AddArg(x)
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = int8ToAuxInt(int8(log32(c / 5)))
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWQSX (MOVWQSX x))
-       // result: (MOVWQSX x)
+       // match: (MULLconst [c] x)
+       // cond: c%9 == 0 && isPowerOfTwo32(c/9)
+       // result: (SHLLconst [int8(log32(c/9))] (LEAL8 <v.Type> x x))
        for {
-               if v_0.Op != OpAMD64MOVWQSX {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(c%9 == 0 && isPowerOfTwo32(c/9)) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVWQSX)
-               v.AddArg(x)
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = int8ToAuxInt(int8(log32(c / 9)))
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWQSX (MOVBQSX x))
-       // result: (MOVBQSX x)
+       // match: (MULLconst [c] (MOVLconst [d]))
+       // result: (MOVLconst [c*d])
        for {
-               if v_0.Op != OpAMD64MOVBQSX {
+               c := auxIntToInt32(v.AuxInt)
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQSX)
-               v.AddArg(x)
+               d := auxIntToInt32(v_0.AuxInt)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(c * d)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWQSXload(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MULQ(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVWQSX x)
+       // match: (MULQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (MULQconst [int32(c)] x)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVWstore {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64MOVQconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(is32Bit(c)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MULQconst)
+                       v.AuxInt = int32ToAuxInt(int32(c))
+                       v.AddArg(x)
+                       return true
+               }
+               break
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULQconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MULQconst [c] (MULQconst [d] x))
+       // cond: is32Bit(int64(c)*int64(d))
+       // result: (MULQconst [c * d] x)
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               if v_0.Op != OpAMD64MULQconst {
                        break
                }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               x := v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               d := auxIntToInt32(v_0.AuxInt)
+               x := v_0.Args[0]
+               if !(is32Bit(int64(c) * int64(d))) {
                        break
                }
-               v.reset(OpAMD64MOVWQSX)
+               v.reset(OpAMD64MULQconst)
+               v.AuxInt = int32ToAuxInt(c * d)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MULQconst [-9] x)
+       // result: (NEGQ (LEAQ8 <v.Type> x x))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               base := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+               if auxIntToInt32(v.AuxInt) != -9 {
                        break
                }
-               v.reset(OpAMD64MOVWQSXload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(base, mem)
+               x := v_0
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       // match: (MULQconst [-5] x)
+       // result: (NEGQ (LEAQ4 <v.Type> x x))
        for {
-               x := v_0
-               if x.Op != OpAMD64MOVWload {
-                       break
-               }
-               off := auxIntToInt32(x.AuxInt)
-               sym := auxToSym(x.Aux)
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
+               if auxIntToInt32(v.AuxInt) != -5 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = int32ToAuxInt(off)
-               v0.Aux = symToAux(sym)
-               v0.AddArg2(ptr, mem)
+               x := v_0
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       // match: (MULQconst [-3] x)
+       // result: (NEGQ (LEAQ2 <v.Type> x x))
        for {
-               x := v_0
-               if x.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := auxIntToInt32(x.AuxInt)
-               sym := auxToSym(x.Aux)
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
+               if auxIntToInt32(v.AuxInt) != -3 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = int32ToAuxInt(off)
-               v0.Aux = symToAux(sym)
-               v0.AddArg2(ptr, mem)
+               x := v_0
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       // match: (MULQconst [-1] x)
+       // result: (NEGQ x)
        for {
-               x := v_0
-               if x.Op != OpAMD64MOVQload {
+               if auxIntToInt32(v.AuxInt) != -1 {
                        break
                }
-               off := auxIntToInt32(x.AuxInt)
-               sym := auxToSym(x.Aux)
-               mem := x.Args[1]
-               ptr := x.Args[0]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := v_0
+               v.reset(OpAMD64NEGQ)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [ 0] _)
+       // result: (MOVQconst [0])
+       for {
+               if auxIntToInt32(v.AuxInt) != 0 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type)
-               v.copyOf(v0)
-               v0.AuxInt = int32ToAuxInt(off)
-               v0.Aux = symToAux(sym)
-               v0.AddArg2(ptr, mem)
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = int64ToAuxInt(0)
                return true
        }
-       // match: (MOVWQZX x)
-       // cond: zeroUpper48Bits(x,3)
+       // match: (MULQconst [ 1] x)
        // result: x
        for {
-               x := v_0
-               if !(zeroUpper48Bits(x, 3)) {
+               if auxIntToInt32(v.AuxInt) != 1 {
                        break
                }
+               x := v_0
                v.copyOf(x)
                return true
        }
-       // match: (MOVWQZX (ANDLconst [c] x))
-       // result: (ANDLconst [c & 0xffff] x)
+       // match: (MULQconst [ 3] x)
+       // result: (LEAQ2 x x)
        for {
-               if v_0.Op != OpAMD64ANDLconst {
+               if auxIntToInt32(v.AuxInt) != 3 {
                        break
                }
-               c := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = int32ToAuxInt(c & 0xffff)
-               v.AddArg(x)
+               x := v_0
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg2(x, x)
                return true
        }
-       // match: (MOVWQZX (MOVWQZX x))
-       // result: (MOVWQZX x)
+       // match: (MULQconst [ 5] x)
+       // result: (LEAQ4 x x)
        for {
-               if v_0.Op != OpAMD64MOVWQZX {
+               if auxIntToInt32(v.AuxInt) != 5 {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVWQZX)
-               v.AddArg(x)
+               x := v_0
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg2(x, x)
                return true
        }
-       // match: (MOVWQZX (MOVBQZX x))
-       // result: (MOVBQZX x)
+       // match: (MULQconst [ 7] x)
+       // result: (LEAQ2 x (LEAQ2 <v.Type> x x))
        for {
-               if v_0.Op != OpAMD64MOVBQZX {
+               if auxIntToInt32(v.AuxInt) != 7 {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQZX)
-               v.AddArg(x)
+               x := v_0
+               v.reset(OpAMD64LEAQ2)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVWQZX x)
+       // match: (MULQconst [ 9] x)
+       // result: (LEAQ8 x x)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVWstore {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               x := v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if auxIntToInt32(v.AuxInt) != 9 {
                        break
                }
-               v.reset(OpAMD64MOVWQZX)
-               v.AddArg(x)
+               x := v_0
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg2(x, x)
                return true
        }
-       // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVWload [off1+off2] {sym} ptr mem)
+       // match: (MULQconst [11] x)
+       // result: (LEAQ2 x (LEAQ4 <v.Type> x x))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1) + int64(off2))) {
+               if auxIntToInt32(v.AuxInt) != 11 {
                        break
                }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               x := v_0
+               v.reset(OpAMD64LEAQ2)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MULQconst [13] x)
+       // result: (LEAQ4 x (LEAQ2 <v.Type> x x))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               base := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+               if auxIntToInt32(v.AuxInt) != 13 {
                        break
                }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(base, mem)
+               x := v_0
+               v.reset(OpAMD64LEAQ4)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVWload [off] {sym} (SB) _)
-       // cond: symIsRO(sym)
-       // result: (MOVLconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
+       // match: (MULQconst [19] x)
+       // result: (LEAQ2 x (LEAQ8 <v.Type> x x))
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpSB || !(symIsRO(sym)) {
+               if auxIntToInt32(v.AuxInt) != 19 {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder)))
+               x := v_0
+               v.reset(OpAMD64LEAQ2)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem)
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       // match: (MULQconst [21] x)
+       // result: (LEAQ4 x (LEAQ4 <v.Type> x x))
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVWQSX {
+               if auxIntToInt32(v.AuxInt) != 21 {
                        break
                }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
+               x := v_0
+               v.reset(OpAMD64LEAQ4)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem)
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       // match: (MULQconst [25] x)
+       // result: (LEAQ8 x (LEAQ2 <v.Type> x x))
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVWQZX {
+               if auxIntToInt32(v.AuxInt) != 25 {
                        break
                }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
+               x := v_0
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
+       // match: (MULQconst [27] x)
+       // result: (LEAQ8 (LEAQ2 <v.Type> x x) (LEAQ2 <v.Type> x x))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
+               if auxIntToInt32(v.AuxInt) != 27 {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, val, mem)
+               x := v_0
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(v0, v0)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // result: (MOVWstoreconst [makeValAndOff(int32(int16(c)),off)] {sym} ptr mem)
+       // match: (MULQconst [37] x)
+       // result: (LEAQ4 x (LEAQ8 <v.Type> x x))
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVLconst {
+               if auxIntToInt32(v.AuxInt) != 37 {
                        break
                }
-               c := auxIntToInt32(v_1.AuxInt)
-               mem := v_2
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int16(c)), off))
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               x := v_0
+               v.reset(OpAMD64LEAQ4)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVQconst [c]) mem)
-       // result: (MOVWstoreconst [makeValAndOff(int32(int16(c)),off)] {sym} ptr mem)
+       // match: (MULQconst [41] x)
+       // result: (LEAQ8 x (LEAQ4 <v.Type> x x))
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpAMD64MOVQconst {
+               if auxIntToInt32(v.AuxInt) != 41 {
                        break
                }
-               c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int16(c)), off))
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               x := v_0
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (MULQconst [45] x)
+       // result: (LEAQ8 (LEAQ4 <v.Type> x x) (LEAQ4 <v.Type> x x))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               base := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+               if auxIntToInt32(v.AuxInt) != 45 {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg3(base, val, mem)
+               x := v_0
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(v0, v0)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w mem)
+       // match: (MULQconst [73] x)
+       // result: (LEAQ8 x (LEAQ8 <v.Type> x x))
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpAMD64SHRLconst || auxIntToInt8(v_1.AuxInt) != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               if auxIntToInt32(v.AuxInt) != 73 {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               x := v_0
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w mem)
+       // match: (MULQconst [81] x)
+       // result: (LEAQ8 (LEAQ8 <v.Type> x x) (LEAQ8 <v.Type> x x))
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               if auxIntToInt32(v.AuxInt) != 81 {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               x := v_0
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg2(v0, v0)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w0 mem)
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo64(int64(c)+1) && c >= 15
+       // result: (SUBQ (SHLQconst <v.Type> [int8(log64(int64(c)+1))] x) x)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpAMD64SHRLconst {
-                       break
-               }
-               j := auxIntToInt8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRLconst || auxIntToInt8(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(isPowerOfTwo64(int64(c)+1) && c >= 15) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
+               v.reset(OpAMD64SUBQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = int8ToAuxInt(int8(log64(int64(c) + 1)))
+               v0.AddArg(x)
+               v.AddArg2(v0, x)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w0 mem)
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo32(c-1) && c >= 17
+       // result: (LEAQ1 (SHLQconst <v.Type> [int8(log32(c-1))] x) x)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := auxIntToInt8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst || auxIntToInt8(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(isPowerOfTwo32(c-1) && c >= 17) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
+               v.reset(OpAMD64LEAQ1)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c - 1)))
+               v0.AddArg(x)
+               v.AddArg2(v0, x)
                return true
        }
-       // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
-       // result: (MOVLstore [i] {s} p0 w mem)
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo32(c-2) && c >= 34
+       // result: (LEAQ2 (SHLQconst <v.Type> [int8(log32(c-2))] x) x)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpAMD64SHRLconst || auxIntToInt8(v_1.AuxInt) != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(isPowerOfTwo32(c-2) && c >= 34) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
+               v.reset(OpAMD64LEAQ2)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c - 2)))
+               v0.AddArg(x)
+               v.AddArg2(v0, x)
                return true
        }
-       // match: (MOVWstore [i] {s} p1 (SHRQconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
-       // result: (MOVLstore [i] {s} p0 w mem)
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo32(c-4) && c >= 68
+       // result: (LEAQ4 (SHLQconst <v.Type> [int8(log32(c-4))] x) x)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(isPowerOfTwo32(c-4) && c >= 68) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w, mem)
+               v.reset(OpAMD64LEAQ4)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c - 4)))
+               v0.AddArg(x)
+               v.AddArg2(v0, x)
                return true
        }
-       // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
-       // result: (MOVLstore [i] {s} p0 w0 mem)
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo32(c-8) && c >= 136
+       // result: (LEAQ8 (SHLQconst <v.Type> [int8(log32(c-8))] x) x)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpAMD64SHRLconst {
-                       break
-               }
-               j := auxIntToInt8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRLconst || auxIntToInt8(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(isPowerOfTwo32(c-8) && c >= 136) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w0, mem)
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c - 8)))
+               v0.AddArg(x)
+               v.AddArg2(v0, x)
                return true
        }
-       // match: (MOVWstore [i] {s} p1 (SHRQconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRQconst [j-16] w) mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
-       // result: (MOVLstore [i] {s} p0 w0 mem)
+       // match: (MULQconst [c] x)
+       // cond: c%3 == 0 && isPowerOfTwo32(c/3)
+       // result: (SHLQconst [int8(log32(c/3))] (LEAQ2 <v.Type> x x))
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := auxIntToInt8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpAMD64MOVWstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(c%3 == 0 && isPowerOfTwo32(c/3)) {
                        break
                }
-               mem := x.Args[2]
-               p0 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst || auxIntToInt8(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p0, w0, mem)
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = int8ToAuxInt(int8(log32(c / 3)))
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) mem2:(MOVWstore [i-2] {s} p x2:(MOVWload [j-2] {s2} p2 mem) mem))
-       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)
-       // result: (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem)
+       // match: (MULQconst [c] x)
+       // cond: c%5 == 0 && isPowerOfTwo32(c/5)
+       // result: (SHLQconst [int8(log32(c/5))] (LEAQ4 <v.Type> x x))
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               x1 := v_1
-               if x1.Op != OpAMD64MOVWload {
-                       break
-               }
-               j := auxIntToInt32(x1.AuxInt)
-               s2 := auxToSym(x1.Aux)
-               mem := x1.Args[1]
-               p2 := x1.Args[0]
-               mem2 := v_2
-               if mem2.Op != OpAMD64MOVWstore || auxIntToInt32(mem2.AuxInt) != i-2 || auxToSym(mem2.Aux) != s {
-                       break
-               }
-               _ = mem2.Args[2]
-               if p != mem2.Args[0] {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(c%5 == 0 && isPowerOfTwo32(c/5)) {
                        break
                }
-               x2 := mem2.Args[1]
-               if x2.Op != OpAMD64MOVWload || auxIntToInt32(x2.AuxInt) != j-2 || auxToSym(x2.Aux) != s2 {
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = int8ToAuxInt(int8(log32(c / 5)))
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: c%9 == 0 && isPowerOfTwo32(c/9)
+       // result: (SHLQconst [int8(log32(c/9))] (LEAQ8 <v.Type> x x))
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(c%9 == 0 && isPowerOfTwo32(c/9)) {
                        break
                }
-               _ = x2.Args[1]
-               if p2 != x2.Args[0] || mem != x2.Args[1] || mem != mem2.Args[2] || !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)) {
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = int8ToAuxInt(int8(log32(c / 9)))
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg2(x, x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] (MOVQconst [d]))
+       // result: (MOVQconst [int64(c)*d])
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x2.Pos, OpAMD64MOVLload, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(j - 2)
-               v0.Aux = symToAux(s2)
-               v0.AddArg2(p2, mem)
-               v.AddArg3(p, v0, mem)
+               d := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = int64ToAuxInt(int64(c) * d)
                return true
        }
-       // match: (MOVWstore [i] {s} p x:(ROLWconst [8] w) mem)
-       // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3
-       // result: (MOVBEWstore [i] {s} p w mem)
+       // match: (MULQconst [c] (NEGQ x))
+       // cond: c != -(1<<31)
+       // result: (MULQconst [-c] x)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               x := v_1
-               if x.Op != OpAMD64ROLWconst || auxIntToInt8(x.AuxInt) != 8 {
+               c := auxIntToInt32(v.AuxInt)
+               if v_0.Op != OpAMD64NEGQ {
                        break
                }
-               w := x.Args[0]
-               mem := v_2
-               if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) {
+               x := v_0.Args[0]
+               if !(c != -(1 << 31)) {
                        break
                }
-               v.reset(OpAMD64MOVBEWstore)
-               v.AuxInt = int32ToAuxInt(i)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               v.reset(OpAMD64MULQconst)
+               v.AuxInt = int32ToAuxInt(-c)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MULSD(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd32(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
+       // match: (MULSD x l:(MOVSDload [off] {sym} ptr mem))
+       // cond: canMergeLoadClobber(v, l, x) && clobber(l)
+       // result: (MULSDload x [off] {sym} ptr mem)
        for {
-               sc := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off := auxIntToInt32(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(ValAndOff(sc).canAdd32(off)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       l := v_1
+                       if l.Op != OpAMD64MOVSDload {
+                               continue
+                       }
+                       off := auxIntToInt32(l.AuxInt)
+                       sym := auxToSym(l.Aux)
+                       mem := l.Args[1]
+                       ptr := l.Args[0]
+                       if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MULSDload)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v.AddArg3(x, ptr, mem)
+                       return true
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
-               v.Aux = symToAux(s)
-               v.AddArg2(ptr, mem)
-               return true
+               break
        }
-       // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULSDload(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (MULSDload [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (MULSDload [off1+off2] {sym} val base mem)
        for {
-               sc := auxIntToValAndOff(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64LEAQ {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               off := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) {
+               off2 := auxIntToInt32(v_1.AuxInt)
+               base := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off))
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(ptr, mem)
+               v.reset(OpAMD64MULSDload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg3(val, base, mem)
                return true
        }
-       // match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
+       // match: (MULSDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (MULSDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               c := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               x := v_1
-               if x.Op != OpAMD64MOVWstoreconst {
-                       break
-               }
-               a := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               mem := x.Args[1]
-               p0 := x.Args[0]
-               if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) && clobber(x)) {
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               base := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p0, mem)
+               v.reset(OpAMD64MULSDload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(val, base, mem)
                return true
        }
-       // match: (MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
-       // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
+       // match: (MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _))
+       // result: (MULSD x (MOVQi2f y))
        for {
-               a := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               x := v_1
-               if x.Op != OpAMD64MOVWstoreconst {
-                       break
-               }
-               c := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               x := v_0
+               ptr := v_1
+               if v_2.Op != OpAMD64MOVQstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym {
                        break
                }
-               mem := x.Args[1]
-               p1 := x.Args[0]
-               if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) && clobber(x)) {
+               y := v_2.Args[1]
+               if ptr != v_2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p0, mem)
+               v.reset(OpAMD64MULSD)
+               v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQi2f, typ.Float64)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MULL(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MULSS(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MULL x (MOVLconst [c]))
-       // result: (MULLconst [c] x)
+       // match: (MULSS x l:(MOVSSload [off] {sym} ptr mem))
+       // cond: canMergeLoadClobber(v, l, x) && clobber(l)
+       // result: (MULSSload x [off] {sym} ptr mem)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                        x := v_0
-                       if v_1.Op != OpAMD64MOVLconst {
+                       l := v_1
+                       if l.Op != OpAMD64MOVSSload {
                                continue
                        }
-                       c := auxIntToInt32(v_1.AuxInt)
-                       v.reset(OpAMD64MULLconst)
-                       v.AuxInt = int32ToAuxInt(c)
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
+                       off := auxIntToInt32(l.AuxInt)
+                       sym := auxToSym(l.Aux)
+                       mem := l.Args[1]
+                       ptr := l.Args[0]
+                       if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
+                               continue
+                       }
+                       v.reset(OpAMD64MULSSload)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v.AddArg3(x, ptr, mem)
+                       return true
+               }
+               break
+       }
        return false
 }
-func rewriteValueAMD64_OpAMD64MULLconst(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MULSSload(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (MULLconst [c] (MULLconst [d] x))
-       // result: (MULLconst [c * d] x)
+       typ := &b.Func.Config.Types
+       // match: (MULSSload [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (MULSSload [off1+off2] {sym} val base mem)
        for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64MULLconst {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               d := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64MULLconst)
-               v.AuxInt = int32ToAuxInt(c * d)
-               v.AddArg(x)
+               off2 := auxIntToInt32(v_1.AuxInt)
+               base := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1) + int64(off2))) {
+                       break
+               }
+               v.reset(OpAMD64MULSSload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg3(val, base, mem)
                return true
        }
-       // match: (MULLconst [-9] x)
-       // result: (NEGL (LEAL8 <v.Type> x x))
+       // match: (MULSSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (MULSSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               if auxIntToInt32(v.AuxInt) != -9 {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64NEGL)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               base := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MULSSload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(val, base, mem)
                return true
        }
-       // match: (MULLconst [-5] x)
-       // result: (NEGL (LEAL4 <v.Type> x x))
+       // match: (MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _))
+       // result: (MULSS x (MOVLi2f y))
        for {
-               if auxIntToInt32(v.AuxInt) != -5 {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               x := v_0
+               ptr := v_1
+               if v_2.Op != OpAMD64MOVLstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64NEGL)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
+               y := v_2.Args[1]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64MULSS)
+               v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLi2f, typ.Float32)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MULLconst [-3] x)
-       // result: (NEGL (LEAL2 <v.Type> x x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64NEGL(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (NEGL (NEGL x))
+       // result: x
        for {
-               if auxIntToInt32(v.AuxInt) != -3 {
+               if v_0.Op != OpAMD64NEGL {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64NEGL)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
+               x := v_0.Args[0]
+               v.copyOf(x)
                return true
        }
-       // match: (MULLconst [-1] x)
-       // result: (NEGL x)
+       // match: (NEGL s:(SUBL x y))
+       // cond: s.Uses == 1
+       // result: (SUBL y x)
        for {
-               if auxIntToInt32(v.AuxInt) != -1 {
+               s := v_0
+               if s.Op != OpAMD64SUBL {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64NEGL)
-               v.AddArg(x)
+               y := s.Args[1]
+               x := s.Args[0]
+               if !(s.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SUBL)
+               v.AddArg2(y, x)
                return true
        }
-       // match: (MULLconst [ 0] _)
-       // result: (MOVLconst [0])
+       // match: (NEGL (MOVLconst [c]))
+       // result: (MOVLconst [-c])
        for {
-               if auxIntToInt32(v.AuxInt) != 0 {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
+               c := auxIntToInt32(v_0.AuxInt)
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(0)
+               v.AuxInt = int32ToAuxInt(-c)
                return true
        }
-       // match: (MULLconst [ 1] x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64NEGQ(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (NEGQ (NEGQ x))
        // result: x
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               if v_0.Op != OpAMD64NEGQ {
                        break
                }
-               x := v_0
+               x := v_0.Args[0]
                v.copyOf(x)
                return true
        }
-       // match: (MULLconst [ 3] x)
-       // result: (LEAL2 x x)
+       // match: (NEGQ s:(SUBQ x y))
+       // cond: s.Uses == 1
+       // result: (SUBQ y x)
        for {
-               if auxIntToInt32(v.AuxInt) != 3 {
+               s := v_0
+               if s.Op != OpAMD64SUBQ {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAL2)
-               v.AddArg2(x, x)
-               return true
-       }
-       // match: (MULLconst [ 5] x)
-       // result: (LEAL4 x x)
-       for {
-               if auxIntToInt32(v.AuxInt) != 5 {
+               y := s.Args[1]
+               x := s.Args[0]
+               if !(s.Uses == 1) {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAL4)
-               v.AddArg2(x, x)
+               v.reset(OpAMD64SUBQ)
+               v.AddArg2(y, x)
                return true
        }
-       // match: (MULLconst [ 7] x)
-       // result: (LEAL2 x (LEAL2 <v.Type> x x))
+       // match: (NEGQ (MOVQconst [c]))
+       // result: (MOVQconst [-c])
        for {
-               if auxIntToInt32(v.AuxInt) != 7 {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAL2)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = int64ToAuxInt(-c)
                return true
        }
-       // match: (MULLconst [ 9] x)
-       // result: (LEAL8 x x)
+       // match: (NEGQ (ADDQconst [c] (NEGQ x)))
+       // cond: c != -(1<<31)
+       // result: (ADDQconst [-c] x)
        for {
-               if auxIntToInt32(v.AuxInt) != 9 {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAL8)
-               v.AddArg2(x, x)
-               return true
-       }
-       // match: (MULLconst [11] x)
-       // result: (LEAL2 x (LEAL4 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 11 {
+               c := auxIntToInt32(v_0.AuxInt)
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAL2)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (MULLconst [13] x)
-       // result: (LEAL4 x (LEAL2 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 13 {
+               x := v_0_0.Args[0]
+               if !(c != -(1 << 31)) {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAL4)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = int32ToAuxInt(-c)
+               v.AddArg(x)
                return true
        }
-       // match: (MULLconst [19] x)
-       // result: (LEAL2 x (LEAL8 <v.Type> x x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64NOTL(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (NOTL (MOVLconst [c]))
+       // result: (MOVLconst [^c])
        for {
-               if auxIntToInt32(v.AuxInt) != 19 {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAL2)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
+               c := auxIntToInt32(v_0.AuxInt)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(^c)
                return true
        }
-       // match: (MULLconst [21] x)
-       // result: (LEAL4 x (LEAL4 <v.Type> x x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64NOTQ(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (NOTQ (MOVQconst [c]))
+       // result: (MOVQconst [^c])
        for {
-               if auxIntToInt32(v.AuxInt) != 21 {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAL4)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = int64ToAuxInt(^c)
                return true
        }
-       // match: (MULLconst [25] x)
-       // result: (LEAL8 x (LEAL2 <v.Type> x x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ORL (SHLL (MOVLconst [1]) y) x)
+       // result: (BTSL x y)
        for {
-               if auxIntToInt32(v.AuxInt) != 25 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLL {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       v_0_0 := v_0.Args[0]
+                       if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       x := v_1
+                       v.reset(OpAMD64BTSL)
+                       v.AddArg2(x, y)
+                       return true
                }
-               x := v_0
-               v.reset(OpAMD64LEAL8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
+               break
        }
-       // match: (MULLconst [27] x)
-       // result: (LEAL8 (LEAL2 <v.Type> x x) (LEAL2 <v.Type> x x))
+       // match: (ORL (MOVLconst [c]) x)
+       // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
+       // result: (BTSLconst [int8(log32(c))] x)
        for {
-               if auxIntToInt32(v.AuxInt) != 27 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64MOVLconst {
+                               continue
+                       }
+                       c := auxIntToInt32(v_0.AuxInt)
+                       x := v_1
+                       if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
+                               continue
+                       }
+                       v.reset(OpAMD64BTSLconst)
+                       v.AuxInt = int8ToAuxInt(int8(log32(c)))
+                       v.AddArg(x)
+                       return true
                }
-               x := v_0
-               v.reset(OpAMD64LEAL8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(v0, v0)
-               return true
+               break
        }
-       // match: (MULLconst [37] x)
-       // result: (LEAL4 x (LEAL8 <v.Type> x x))
+       // match: (ORL x (MOVLconst [c]))
+       // result: (ORLconst [c] x)
        for {
-               if auxIntToInt32(v.AuxInt) != 37 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64MOVLconst {
+                               continue
+                       }
+                       c := auxIntToInt32(v_1.AuxInt)
+                       v.reset(OpAMD64ORLconst)
+                       v.AuxInt = int32ToAuxInt(c)
+                       v.AddArg(x)
+                       return true
                }
-               x := v_0
-               v.reset(OpAMD64LEAL4)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
+               break
        }
-       // match: (MULLconst [41] x)
-       // result: (LEAL8 x (LEAL4 <v.Type> x x))
+       // match: (ORL x x)
+       // result: x
        for {
-               if auxIntToInt32(v.AuxInt) != 41 {
+               x := v_0
+               if x != v_1 {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAL8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
+               v.copyOf(x)
                return true
        }
-       // match: (MULLconst [45] x)
-       // result: (LEAL8 (LEAL4 <v.Type> x x) (LEAL4 <v.Type> x x))
+       // match: (ORL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoadClobber(v, l, x) && clobber(l)
+       // result: (ORLload x [off] {sym} ptr mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 45 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       l := v_1
+                       if l.Op != OpAMD64MOVLload {
+                               continue
+                       }
+                       off := auxIntToInt32(l.AuxInt)
+                       sym := auxToSym(l.Aux)
+                       mem := l.Args[1]
+                       ptr := l.Args[0]
+                       if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
+                               continue
+                       }
+                       v.reset(OpAMD64ORLload)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v.AddArg3(x, ptr, mem)
+                       return true
                }
-               x := v_0
-               v.reset(OpAMD64LEAL8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(v0, v0)
-               return true
+               break
        }
-       // match: (MULLconst [73] x)
-       // result: (LEAL8 x (LEAL8 <v.Type> x x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (ORLconst [c] x)
+       // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
+       // result: (BTSLconst [int8(log32(c))] x)
        for {
-               if auxIntToInt32(v.AuxInt) != 73 {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAL8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64BTSLconst)
+               v.AuxInt = int8ToAuxInt(int8(log32(c)))
+               v.AddArg(x)
                return true
        }
-       // match: (MULLconst [81] x)
-       // result: (LEAL8 (LEAL8 <v.Type> x x) (LEAL8 <v.Type> x x))
+       // match: (ORLconst [c] (ORLconst [d] x))
+       // result: (ORLconst [c | d] x)
        for {
-               if auxIntToInt32(v.AuxInt) != 81 {
+               c := auxIntToInt32(v.AuxInt)
+               if v_0.Op != OpAMD64ORLconst {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAL8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(v0, v0)
+               d := auxIntToInt32(v_0.AuxInt)
+               x := v_0.Args[0]
+               v.reset(OpAMD64ORLconst)
+               v.AuxInt = int32ToAuxInt(c | d)
+               v.AddArg(x)
                return true
        }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo64(int64(c)+1) && c >= 15
-       // result: (SUBL (SHLLconst <v.Type> [int8(log64(int64(c)+1))] x) x)
+       // match: (ORLconst [c] (BTSLconst [d] x))
+       // result: (ORLconst [c | 1<<uint32(d)] x)
        for {
                c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isPowerOfTwo64(int64(c)+1) && c >= 15) {
+               if v_0.Op != OpAMD64BTSLconst {
                        break
                }
-               v.reset(OpAMD64SUBL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v0.AuxInt = int8ToAuxInt(int8(log64(int64(c) + 1)))
-               v0.AddArg(x)
-               v.AddArg2(v0, x)
+               d := auxIntToInt8(v_0.AuxInt)
+               x := v_0.Args[0]
+               v.reset(OpAMD64ORLconst)
+               v.AuxInt = int32ToAuxInt(c | 1<<uint32(d))
+               v.AddArg(x)
                return true
        }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo32(c-1) && c >= 17
-       // result: (LEAL1 (SHLLconst <v.Type> [int8(log32(c-1))] x) x)
+       // match: (ORLconst [c] x)
+       // cond: c==0
+       // result: x
        for {
                c := auxIntToInt32(v.AuxInt)
                x := v_0
-               if !(isPowerOfTwo32(c-1) && c >= 17) {
+               if !(c == 0) {
                        break
                }
-               v.reset(OpAMD64LEAL1)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v0.AuxInt = int8ToAuxInt(int8(log32(c - 1)))
-               v0.AddArg(x)
-               v.AddArg2(v0, x)
+               v.copyOf(x)
                return true
        }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo32(c-2) && c >= 34
-       // result: (LEAL2 (SHLLconst <v.Type> [int8(log32(c-2))] x) x)
+       // match: (ORLconst [c] _)
+       // cond: c==-1
+       // result: (MOVLconst [-1])
        for {
                c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isPowerOfTwo32(c-2) && c >= 34) {
+               if !(c == -1) {
                        break
                }
-               v.reset(OpAMD64LEAL2)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v0.AuxInt = int8ToAuxInt(int8(log32(c - 2)))
-               v0.AddArg(x)
-               v.AddArg2(v0, x)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(-1)
                return true
        }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo32(c-4) && c >= 68
-       // result: (LEAL4 (SHLLconst <v.Type> [int8(log32(c-4))] x) x)
+       // match: (ORLconst [c] (MOVLconst [d]))
+       // result: (MOVLconst [c|d])
        for {
                c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isPowerOfTwo32(c-4) && c >= 68) {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64LEAL4)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v0.AuxInt = int8ToAuxInt(int8(log32(c - 4)))
-               v0.AddArg(x)
-               v.AddArg2(v0, x)
+               d := auxIntToInt32(v_0.AuxInt)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(c | d)
                return true
        }
-       // match: (MULLconst [c] x)
-       // cond: isPowerOfTwo32(c-8) && c >= 136
-       // result: (LEAL8 (SHLLconst <v.Type> [int8(log32(c-8))] x) x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORLconstmodify(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+       // cond: ValAndOff(valoff1).canAdd32(off2)
+       // result: (ORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
        for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isPowerOfTwo32(c-8) && c >= 136) {
+               valoff1 := auxIntToValAndOff(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               v.reset(OpAMD64LEAL8)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v0.AuxInt = int8ToAuxInt(int8(log32(c - 8)))
-               v0.AddArg(x)
-               v.AddArg2(v0, x)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: c%3 == 0 && isPowerOfTwo32(c/3)
-       // result: (SHLLconst [int8(log32(c/3))] (LEAL2 <v.Type> x x))
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(c%3 == 0 && isPowerOfTwo32(c/3)) {
-                       break
-               }
-               v.reset(OpAMD64SHLLconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(c / 3)))
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [c] x)
-       // cond: c%5 == 0 && isPowerOfTwo32(c/5)
-       // result: (SHLLconst [int8(log32(c/5))] (LEAL4 <v.Type> x x))
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(c%5 == 0 && isPowerOfTwo32(c/5)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               base := v_0.Args[0]
+               mem := v_1
+               if !(ValAndOff(valoff1).canAdd32(off2)) {
                        break
                }
-               v.reset(OpAMD64SHLLconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(c / 5)))
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
+               v.reset(OpAMD64ORLconstmodify)
+               v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg2(base, mem)
                return true
        }
-       // match: (MULLconst [c] x)
-       // cond: c%9 == 0 && isPowerOfTwo32(c/9)
-       // result: (SHLLconst [int8(log32(c/9))] (LEAL8 <v.Type> x x))
+       // match: (ORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)
+       // result: (ORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
        for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(c%9 == 0 && isPowerOfTwo32(c/9)) {
+               valoff1 := auxIntToValAndOff(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               v.reset(OpAMD64SHLLconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(c / 9)))
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULLconst [c] (MOVLconst [d]))
-       // result: (MOVLconst [c*d])
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64MOVLconst {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               mem := v_1
+               if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               d := auxIntToInt32(v_0.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(c * d)
+               v.reset(OpAMD64ORLconstmodify)
+               v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(base, mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MULQ(v *Value) bool {
+func rewriteValueAMD64_OpAMD64ORLload(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MULQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (MULQconst [int32(c)] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(is32Bit(c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MULQconst)
-                       v.AuxInt = int32ToAuxInt(int32(c))
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULQconst(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (MULQconst [c] (MULQconst [d] x))
-       // cond: is32Bit(int64(c)*int64(d))
-       // result: (MULQconst [c * d] x)
+       typ := &b.Func.Config.Types
+       // match: (ORLload [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (ORLload [off1+off2] {sym} val base mem)
        for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64MULQconst {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               d := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(is32Bit(int64(c) * int64(d))) {
+               off2 := auxIntToInt32(v_1.AuxInt)
+               base := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               v.reset(OpAMD64MULQconst)
-               v.AuxInt = int32ToAuxInt(c * d)
-               v.AddArg(x)
+               v.reset(OpAMD64ORLload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg3(val, base, mem)
                return true
        }
-       // match: (MULQconst [-9] x)
-       // result: (NEGQ (LEAQ8 <v.Type> x x))
+       // match: (ORLload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (ORLload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               if auxIntToInt32(v.AuxInt) != -9 {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [-5] x)
-       // result: (NEGQ (LEAQ4 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != -5 {
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               base := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
+               v.reset(OpAMD64ORLload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(val, base, mem)
                return true
        }
-       // match: (MULQconst [-3] x)
-       // result: (NEGQ (LEAQ2 <v.Type> x x))
+       // match: ( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
+       // result: ( ORL x (MOVLf2i y))
        for {
-               if auxIntToInt32(v.AuxInt) != -3 {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               x := v_0
+               ptr := v_1
+               if v_2.Op != OpAMD64MOVSSstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [-1] x)
-       // result: (NEGQ x)
-       for {
-               if auxIntToInt32(v.AuxInt) != -1 {
+               y := v_2.Args[1]
+               if ptr != v_2.Args[0] {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64NEGQ)
-               v.AddArg(x)
+               v.reset(OpAMD64ORL)
+               v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLf2i, typ.UInt32)
+               v0.AddArg(y)
+               v.AddArg2(x, v0)
                return true
        }
-       // match: (MULQconst [ 0] _)
-       // result: (MOVQconst [0])
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORLmodify(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ORLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (ORLmodify [off1+off2] {sym} base val mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 0 {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-       // match: (MULQconst [ 1] x)
-       // result: x
-       for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1) + int64(off2))) {
                        break
                }
-               x := v_0
-               v.copyOf(x)
+               v.reset(OpAMD64ORLmodify)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MULQconst [ 3] x)
-       // result: (LEAQ2 x x)
+       // match: (ORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
+       // result: (ORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 3 {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAQ2)
-               v.AddArg2(x, x)
-               return true
-       }
-       // match: (MULQconst [ 5] x)
-       // result: (LEAQ4 x x)
-       for {
-               if auxIntToInt32(v.AuxInt) != 5 {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
                        break
                }
-               x := v_0
-               v.reset(OpAMD64LEAQ4)
-               v.AddArg2(x, x)
-               return true
-       }
-       // match: (MULQconst [ 7] x)
-       // result: (LEAQ2 x (LEAQ2 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 7 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ2)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (MULQconst [ 9] x)
-       // result: (LEAQ8 x x)
-       for {
-               if auxIntToInt32(v.AuxInt) != 9 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ8)
-               v.AddArg2(x, x)
-               return true
-       }
-       // match: (MULQconst [11] x)
-       // result: (LEAQ2 x (LEAQ4 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 11 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ2)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (MULQconst [13] x)
-       // result: (LEAQ4 x (LEAQ2 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 13 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ4)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (MULQconst [19] x)
-       // result: (LEAQ2 x (LEAQ8 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 19 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ2)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (MULQconst [21] x)
-       // result: (LEAQ4 x (LEAQ4 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 21 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ4)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (MULQconst [25] x)
-       // result: (LEAQ8 x (LEAQ2 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 25 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (MULQconst [27] x)
-       // result: (LEAQ8 (LEAQ2 <v.Type> x x) (LEAQ2 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 27 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(v0, v0)
-               return true
-       }
-       // match: (MULQconst [37] x)
-       // result: (LEAQ4 x (LEAQ8 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 37 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ4)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (MULQconst [41] x)
-       // result: (LEAQ8 x (LEAQ4 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 41 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (MULQconst [45] x)
-       // result: (LEAQ8 (LEAQ4 <v.Type> x x) (LEAQ4 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 45 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(v0, v0)
-               return true
-       }
-       // match: (MULQconst [73] x)
-       // result: (LEAQ8 x (LEAQ8 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 73 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (MULQconst [81] x)
-       // result: (LEAQ8 (LEAQ8 <v.Type> x x) (LEAQ8 <v.Type> x x))
-       for {
-               if auxIntToInt32(v.AuxInt) != 81 {
-                       break
-               }
-               x := v_0
-               v.reset(OpAMD64LEAQ8)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg2(v0, v0)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo64(int64(c)+1) && c >= 15
-       // result: (SUBQ (SHLQconst <v.Type> [int8(log64(int64(c)+1))] x) x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isPowerOfTwo64(int64(c)+1) && c >= 15) {
-                       break
-               }
-               v.reset(OpAMD64SUBQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = int8ToAuxInt(int8(log64(int64(c) + 1)))
-               v0.AddArg(x)
-               v.AddArg2(v0, x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo32(c-1) && c >= 17
-       // result: (LEAQ1 (SHLQconst <v.Type> [int8(log32(c-1))] x) x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isPowerOfTwo32(c-1) && c >= 17) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ1)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = int8ToAuxInt(int8(log32(c - 1)))
-               v0.AddArg(x)
-               v.AddArg2(v0, x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo32(c-2) && c >= 34
-       // result: (LEAQ2 (SHLQconst <v.Type> [int8(log32(c-2))] x) x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isPowerOfTwo32(c-2) && c >= 34) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ2)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = int8ToAuxInt(int8(log32(c - 2)))
-               v0.AddArg(x)
-               v.AddArg2(v0, x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo32(c-4) && c >= 68
-       // result: (LEAQ4 (SHLQconst <v.Type> [int8(log32(c-4))] x) x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isPowerOfTwo32(c-4) && c >= 68) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ4)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = int8ToAuxInt(int8(log32(c - 4)))
-               v0.AddArg(x)
-               v.AddArg2(v0, x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo32(c-8) && c >= 136
-       // result: (LEAQ8 (SHLQconst <v.Type> [int8(log32(c-8))] x) x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isPowerOfTwo32(c-8) && c >= 136) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ8)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = int8ToAuxInt(int8(log32(c - 8)))
-               v0.AddArg(x)
-               v.AddArg2(v0, x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: c%3 == 0 && isPowerOfTwo32(c/3)
-       // result: (SHLQconst [int8(log32(c/3))] (LEAQ2 <v.Type> x x))
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(c%3 == 0 && isPowerOfTwo32(c/3)) {
-                       break
-               }
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(c / 3)))
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: c%5 == 0 && isPowerOfTwo32(c/5)
-       // result: (SHLQconst [int8(log32(c/5))] (LEAQ4 <v.Type> x x))
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(c%5 == 0 && isPowerOfTwo32(c/5)) {
-                       break
-               }
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(c / 5)))
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: c%9 == 0 && isPowerOfTwo32(c/9)
-       // result: (SHLQconst [int8(log32(c/9))] (LEAQ8 <v.Type> x x))
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(c%9 == 0 && isPowerOfTwo32(c/9)) {
-                       break
-               }
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(c / 9)))
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg2(x, x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [c] (MOVQconst [d]))
-       // result: (MOVQconst [int64(c)*d])
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               d := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = int64ToAuxInt(int64(c) * d)
-               return true
-       }
-       // match: (MULQconst [c] (NEGQ x))
-       // cond: c != -(1<<31)
-       // result: (MULQconst [-c] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               x := v_0.Args[0]
-               if !(c != -(1 << 31)) {
-                       break
-               }
-               v.reset(OpAMD64MULQconst)
-               v.AuxInt = int32ToAuxInt(-c)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULSD(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MULSD x l:(MOVSDload [off] {sym} ptr mem))
-       // cond: canMergeLoadClobber(v, l, x) && clobber(l)
-       // result: (MULSDload x [off] {sym} ptr mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64MOVSDload {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MULSDload)
-                       v.AuxInt = int32ToAuxInt(off)
-                       v.Aux = symToAux(sym)
-                       v.AddArg3(x, ptr, mem)
-                       return true
-               }
-               break
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULSDload(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (MULSDload [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MULSDload [off1+off2] {sym} val base mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               base := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
-                       break
-               }
-               v.reset(OpAMD64MULSDload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(val, base, mem)
-               return true
-       }
-       // match: (MULSDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MULSDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               base := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MULSDload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg3(val, base, mem)
-               return true
-       }
-       // match: (MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _))
-       // result: (MULSD x (MOVQi2f y))
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               x := v_0
-               ptr := v_1
-               if v_2.Op != OpAMD64MOVQstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym {
-                       break
-               }
-               y := v_2.Args[1]
-               if ptr != v_2.Args[0] {
-                       break
-               }
-               v.reset(OpAMD64MULSD)
-               v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQi2f, typ.Float64)
-               v0.AddArg(y)
-               v.AddArg2(x, v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULSS(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MULSS x l:(MOVSSload [off] {sym} ptr mem))
-       // cond: canMergeLoadClobber(v, l, x) && clobber(l)
-       // result: (MULSSload x [off] {sym} ptr mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64MOVSSload {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64MULSSload)
-                       v.AuxInt = int32ToAuxInt(off)
-                       v.Aux = symToAux(sym)
-                       v.AddArg3(x, ptr, mem)
-                       return true
-               }
-               break
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULSSload(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (MULSSload [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (MULSSload [off1+off2] {sym} val base mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               base := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
-                       break
-               }
-               v.reset(OpAMD64MULSSload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(val, base, mem)
-               return true
-       }
-       // match: (MULSSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (MULSSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               base := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MULSSload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg3(val, base, mem)
-               return true
-       }
-       // match: (MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _))
-       // result: (MULSS x (MOVLi2f y))
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               x := v_0
-               ptr := v_1
-               if v_2.Op != OpAMD64MOVLstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym {
-                       break
-               }
-               y := v_2.Args[1]
-               if ptr != v_2.Args[0] {
-                       break
-               }
-               v.reset(OpAMD64MULSS)
-               v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLi2f, typ.Float32)
-               v0.AddArg(y)
-               v.AddArg2(x, v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NEGL(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (NEGL (NEGL x))
-       // result: x
-       for {
-               if v_0.Op != OpAMD64NEGL {
-                       break
-               }
-               x := v_0.Args[0]
-               v.copyOf(x)
-               return true
-       }
-       // match: (NEGL s:(SUBL x y))
-       // cond: s.Uses == 1
-       // result: (SUBL y x)
-       for {
-               s := v_0
-               if s.Op != OpAMD64SUBL {
-                       break
-               }
-               y := s.Args[1]
-               x := s.Args[0]
-               if !(s.Uses == 1) {
-                       break
-               }
-               v.reset(OpAMD64SUBL)
-               v.AddArg2(y, x)
-               return true
-       }
-       // match: (NEGL (MOVLconst [c]))
-       // result: (MOVLconst [-c])
-       for {
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := auxIntToInt32(v_0.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(-c)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NEGQ(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (NEGQ (NEGQ x))
-       // result: x
-       for {
-               if v_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               x := v_0.Args[0]
-               v.copyOf(x)
-               return true
-       }
-       // match: (NEGQ s:(SUBQ x y))
-       // cond: s.Uses == 1
-       // result: (SUBQ y x)
-       for {
-               s := v_0
-               if s.Op != OpAMD64SUBQ {
-                       break
-               }
-               y := s.Args[1]
-               x := s.Args[0]
-               if !(s.Uses == 1) {
-                       break
-               }
-               v.reset(OpAMD64SUBQ)
-               v.AddArg2(y, x)
-               return true
-       }
-       // match: (NEGQ (MOVQconst [c]))
-       // result: (MOVQconst [-c])
-       for {
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = int64ToAuxInt(-c)
-               return true
-       }
-       // match: (NEGQ (ADDQconst [c] (NEGQ x)))
-       // cond: c != -(1<<31)
-       // result: (ADDQconst [-c] x)
-       for {
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := auxIntToInt32(v_0.AuxInt)
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               x := v_0_0.Args[0]
-               if !(c != -(1 << 31)) {
-                       break
-               }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = int32ToAuxInt(-c)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NOTL(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (NOTL (MOVLconst [c]))
-       // result: (MOVLconst [^c])
-       for {
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := auxIntToInt32(v_0.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(^c)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NOTQ(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (NOTQ (MOVQconst [c]))
-       // result: (MOVQconst [^c])
-       for {
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = int64ToAuxInt(^c)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (ORL (SHLL (MOVLconst [1]) y) x)
-       // result: (BTSL x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64SHLL {
-                               continue
-                       }
-                       y := v_0.Args[1]
-                       v_0_0 := v_0.Args[0]
-                       if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 {
-                               continue
-                       }
-                       x := v_1
-                       v.reset(OpAMD64BTSL)
-                       v.AddArg2(x, y)
-                       return true
-               }
-               break
-       }
-       // match: (ORL (MOVLconst [c]) x)
-       // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
-       // result: (BTSLconst [int8(log32(c))] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64MOVLconst {
-                               continue
-                       }
-                       c := auxIntToInt32(v_0.AuxInt)
-                       x := v_1
-                       if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
-                               continue
-                       }
-                       v.reset(OpAMD64BTSLconst)
-                       v.AuxInt = int8ToAuxInt(int8(log32(c)))
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       // match: (ORL x (MOVLconst [c]))
-       // result: (ORLconst [c] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpAMD64MOVLconst {
-                               continue
-                       }
-                       c := auxIntToInt32(v_1.AuxInt)
-                       v.reset(OpAMD64ORLconst)
-                       v.AuxInt = int32ToAuxInt(c)
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       // match: (ORL x x)
-       // result: x
-       for {
-               x := v_0
-               if x != v_1 {
-                       break
-               }
-               v.copyOf(x)
-               return true
-       }
-       // match: (ORL x0:(MOVBload [i0] {s} p mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLLconst || auxIntToInt8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORL x0:(MOVBload [i] {s} p0 mem) sh:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p0 := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLLconst || auxIntToInt8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVBload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       p1 := x1.Args[0]
-                       if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p0, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORL x0:(MOVWload [i0] {s} p mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLLconst || auxIntToInt8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORL x0:(MOVWload [i] {s} p0 mem) sh:(SHLLconst [16] x1:(MOVWload [i] {s} p1 mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p0 := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLLconst || auxIntToInt8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVWload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       p1 := x1.Args[0]
-                       if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p0, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
-       // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpAMD64SHLLconst {
-                               continue
-                       }
-                       j1 := auxIntToInt8(s1.AuxInt)
-                       x1 := s1.Args[0]
-                       if x1.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORL {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s0 := or_0
-                               if s0.Op != OpAMD64SHLLconst {
-                                       continue
-                               }
-                               j0 := auxIntToInt8(s0.AuxInt)
-                               x0 := s0.Args[0]
-                               if x0.Op != OpAMD64MOVBload {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x0.AuxInt)
-                               if auxToSym(x0.Aux) != s {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               if p != x0.Args[0] || mem != x0.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x0.Pos, OpAMD64ORL, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x0.Pos, OpAMD64SHLLconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j0)
-                               v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
-                               v2.AuxInt = int32ToAuxInt(i0)
-                               v2.Aux = symToAux(s)
-                               v2.AddArg2(p, mem)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i] {s} p1 mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i] {s} p0 mem)) y))
-       // cond: j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpAMD64SHLLconst {
-                               continue
-                       }
-                       j1 := auxIntToInt8(s1.AuxInt)
-                       x1 := s1.Args[0]
-                       if x1.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p1 := x1.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORL {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s0 := or_0
-                               if s0.Op != OpAMD64SHLLconst {
-                                       continue
-                               }
-                               j0 := auxIntToInt8(s0.AuxInt)
-                               x0 := s0.Args[0]
-                               if x0.Op != OpAMD64MOVBload || auxIntToInt32(x0.AuxInt) != i || auxToSym(x0.Aux) != s {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               p0 := x0.Args[0]
-                               if mem != x0.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x0.Pos, OpAMD64ORL, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x0.Pos, OpAMD64SHLLconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j0)
-                               v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
-                               v2.AuxInt = int32ToAuxInt(i)
-                               v2.Aux = symToAux(s)
-                               v2.AddArg2(p0, mem)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (ORL x1:(MOVBload [i1] {s} p mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x1 := v_0
-                       if x1.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLLconst || auxIntToInt8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := sh.Args[0]
-                       if x0.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       if auxToSym(x0.Aux) != s {
-                               continue
-                       }
-                       _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, v.Type)
-                       v.copyOf(v0)
-                       v0.AuxInt = int8ToAuxInt(8)
-                       v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
-                       v1.AuxInt = int32ToAuxInt(i0)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (ORL x1:(MOVBload [i] {s} p1 mem) sh:(SHLLconst [8] x0:(MOVBload [i] {s} p0 mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x1 := v_0
-                       if x1.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p1 := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLLconst || auxIntToInt8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := sh.Args[0]
-                       if x0.Op != OpAMD64MOVBload || auxIntToInt32(x0.AuxInt) != i || auxToSym(x0.Aux) != s {
-                               continue
-                       }
-                       _ = x0.Args[1]
-                       p0 := x0.Args[0]
-                       if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, v.Type)
-                       v.copyOf(v0)
-                       v0.AuxInt = int8ToAuxInt(8)
-                       v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
-                       v1.AuxInt = int32ToAuxInt(i)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p0, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r1 := v_0
-                       if r1.Op != OpAMD64ROLWconst || auxIntToInt8(r1.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLLconst || auxIntToInt8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       r0 := sh.Args[0]
-                       if r0.Op != OpAMD64ROLWconst || auxIntToInt8(r0.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       if auxToSym(x0.Aux) != s {
-                               continue
-                       }
-                       _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
-                       v1.AuxInt = int32ToAuxInt(i0)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r1 := v_0
-                       if r1.Op != OpAMD64ROLWconst || auxIntToInt8(r1.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p1 := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLLconst || auxIntToInt8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       r0 := sh.Args[0]
-                       if r0.Op != OpAMD64ROLWconst || auxIntToInt8(r0.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpAMD64MOVWload || auxIntToInt32(x0.AuxInt) != i || auxToSym(x0.Aux) != s {
-                               continue
-                       }
-                       _ = x0.Args[1]
-                       p0 := x0.Args[0]
-                       if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
-                       v1.AuxInt = int32ToAuxInt(i)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p0, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
-       // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpAMD64SHLLconst {
-                               continue
-                       }
-                       j0 := auxIntToInt8(s0.AuxInt)
-                       x0 := s0.Args[0]
-                       if x0.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORL {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s1 := or_0
-                               if s1.Op != OpAMD64SHLLconst {
-                                       continue
-                               }
-                               j1 := auxIntToInt8(s1.AuxInt)
-                               x1 := s1.Args[0]
-                               if x1.Op != OpAMD64MOVBload {
-                                       continue
-                               }
-                               i1 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x1.Pos, OpAMD64ORL, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x1.Pos, OpAMD64SHLLconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j1)
-                               v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16)
-                               v2.AuxInt = int8ToAuxInt(8)
-                               v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
-                               v3.AuxInt = int32ToAuxInt(i0)
-                               v3.Aux = symToAux(s)
-                               v3.AddArg2(p, mem)
-                               v2.AddArg(v3)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i] {s} p0 mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i] {s} p1 mem)) y))
-       // cond: j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpAMD64SHLLconst {
-                               continue
-                       }
-                       j0 := auxIntToInt8(s0.AuxInt)
-                       x0 := s0.Args[0]
-                       if x0.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p0 := x0.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORL {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s1 := or_0
-                               if s1.Op != OpAMD64SHLLconst {
-                                       continue
-                               }
-                               j1 := auxIntToInt8(s1.AuxInt)
-                               x1 := s1.Args[0]
-                               if x1.Op != OpAMD64MOVBload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               p1 := x1.Args[0]
-                               if mem != x1.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x1.Pos, OpAMD64ORL, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x1.Pos, OpAMD64SHLLconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j1)
-                               v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16)
-                               v2.AuxInt = int8ToAuxInt(8)
-                               v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
-                               v3.AuxInt = int32ToAuxInt(i)
-                               v3.Aux = symToAux(s)
-                               v3.AddArg2(p0, mem)
-                               v2.AddArg(v3)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (ORL x l:(MOVLload [off] {sym} ptr mem))
-       // cond: canMergeLoadClobber(v, l, x) && clobber(l)
-       // result: (ORLload x [off] {sym} ptr mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64MOVLload {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64ORLload)
-                       v.AuxInt = int32ToAuxInt(off)
-                       v.Aux = symToAux(sym)
-                       v.AddArg3(x, ptr, mem)
-                       return true
-               }
-               break
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (ORLconst [c] x)
-       // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
-       // result: (BTSLconst [int8(log32(c))] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) {
-                       break
-               }
-               v.reset(OpAMD64BTSLconst)
-               v.AuxInt = int8ToAuxInt(int8(log32(c)))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORLconst [c] (ORLconst [d] x))
-       // result: (ORLconst [c | d] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64ORLconst {
-                       break
-               }
-               d := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64ORLconst)
-               v.AuxInt = int32ToAuxInt(c | d)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORLconst [c] (BTSLconst [d] x))
-       // result: (ORLconst [c | 1<<uint32(d)] x)
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64BTSLconst {
-                       break
-               }
-               d := auxIntToInt8(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64ORLconst)
-               v.AuxInt = int32ToAuxInt(c | 1<<uint32(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORLconst [c] x)
-       // cond: c==0
-       // result: x
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               x := v_0
-               if !(c == 0) {
-                       break
-               }
-               v.copyOf(x)
-               return true
-       }
-       // match: (ORLconst [c] _)
-       // cond: c==-1
-       // result: (MOVLconst [-1])
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if !(c == -1) {
-                       break
-               }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(-1)
-               return true
-       }
-       // match: (ORLconst [c] (MOVLconst [d]))
-       // result: (MOVLconst [c|d])
-       for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               d := auxIntToInt32(v_0.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(c | d)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORLconstmodify(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-       // cond: ValAndOff(valoff1).canAdd32(off2)
-       // result: (ORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
-       for {
-               valoff1 := auxIntToValAndOff(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               base := v_0.Args[0]
-               mem := v_1
-               if !(ValAndOff(valoff1).canAdd32(off2)) {
-                       break
-               }
-               v.reset(OpAMD64ORLconstmodify)
-               v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
-               v.Aux = symToAux(sym)
-               v.AddArg2(base, mem)
-               return true
-       }
-       // match: (ORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)
-       // result: (ORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
-       for {
-               valoff1 := auxIntToValAndOff(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               base := v_0.Args[0]
-               mem := v_1
-               if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64ORLconstmodify)
-               v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2))
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(base, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORLload(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (ORLload [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (ORLload [off1+off2] {sym} val base mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               base := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
-                       break
-               }
-               v.reset(OpAMD64ORLload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(val, base, mem)
-               return true
-       }
-       // match: (ORLload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (ORLload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               base := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64ORLload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg3(val, base, mem)
-               return true
-       }
-       // match: ( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
-       // result: ( ORL x (MOVLf2i y))
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               x := v_0
-               ptr := v_1
-               if v_2.Op != OpAMD64MOVSSstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym {
-                       break
-               }
-               y := v_2.Args[1]
-               if ptr != v_2.Args[0] {
-                       break
-               }
-               v.reset(OpAMD64ORL)
-               v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLf2i, typ.UInt32)
-               v0.AddArg(y)
-               v.AddArg2(x, v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORLmodify(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ORLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (ORLmodify [off1+off2] {sym} base val mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               base := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
-                       break
-               }
-               v.reset(OpAMD64ORLmodify)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(base, val, mem)
-               return true
-       }
-       // match: (ORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
-       // result: (ORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               base := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64ORLmodify)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg3(base, val, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (ORQ (SHLQ (MOVQconst [1]) y) x)
-       // result: (BTSQ x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64SHLQ {
-                               continue
-                       }
-                       y := v_0.Args[1]
-                       v_0_0 := v_0.Args[0]
-                       if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 {
-                               continue
-                       }
-                       x := v_1
-                       v.reset(OpAMD64BTSQ)
-                       v.AddArg2(x, y)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ (MOVQconst [c]) x)
-       // cond: isUint64PowerOfTwo(c) && uint64(c) >= 128
-       // result: (BTSQconst [int8(log64(c))] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_0.AuxInt)
-                       x := v_1
-                       if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) {
-                               continue
-                       }
-                       v.reset(OpAMD64BTSQconst)
-                       v.AuxInt = int8ToAuxInt(int8(log64(c)))
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (ORQconst [int32(c)] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(is32Bit(c)) {
-                               continue
-                       }
-                       v.reset(OpAMD64ORQconst)
-                       v.AuxInt = int32ToAuxInt(int32(c))
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ x (MOVLconst [c]))
-       // result: (ORQconst [c] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpAMD64MOVLconst {
-                               continue
-                       }
-                       c := auxIntToInt32(v_1.AuxInt)
-                       v.reset(OpAMD64ORQconst)
-                       v.AuxInt = int32ToAuxInt(c)
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ (SHRQ lo bits) (SHLQ hi (NEGQ bits)))
-       // result: (SHRDQ lo hi bits)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64SHRQ {
-                               continue
-                       }
-                       bits := v_0.Args[1]
-                       lo := v_0.Args[0]
-                       if v_1.Op != OpAMD64SHLQ {
-                               continue
-                       }
-                       _ = v_1.Args[1]
-                       hi := v_1.Args[0]
-                       v_1_1 := v_1.Args[1]
-                       if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] {
-                               continue
-                       }
-                       v.reset(OpAMD64SHRDQ)
-                       v.AddArg3(lo, hi, bits)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ (SHLQ lo bits) (SHRQ hi (NEGQ bits)))
-       // result: (SHLDQ lo hi bits)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64SHLQ {
-                               continue
-                       }
-                       bits := v_0.Args[1]
-                       lo := v_0.Args[0]
-                       if v_1.Op != OpAMD64SHRQ {
-                               continue
-                       }
-                       _ = v_1.Args[1]
-                       hi := v_1.Args[0]
-                       v_1_1 := v_1.Args[1]
-                       if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] {
-                               continue
-                       }
-                       v.reset(OpAMD64SHLDQ)
-                       v.AddArg3(lo, hi, bits)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ (SHRXQ lo bits) (SHLXQ hi (NEGQ bits)))
-       // result: (SHRDQ lo hi bits)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64SHRXQ {
-                               continue
-                       }
-                       bits := v_0.Args[1]
-                       lo := v_0.Args[0]
-                       if v_1.Op != OpAMD64SHLXQ {
-                               continue
-                       }
-                       _ = v_1.Args[1]
-                       hi := v_1.Args[0]
-                       v_1_1 := v_1.Args[1]
-                       if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] {
-                               continue
-                       }
-                       v.reset(OpAMD64SHRDQ)
-                       v.AddArg3(lo, hi, bits)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ (SHLXQ lo bits) (SHRXQ hi (NEGQ bits)))
-       // result: (SHLDQ lo hi bits)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64SHLXQ {
-                               continue
-                       }
-                       bits := v_0.Args[1]
-                       lo := v_0.Args[0]
-                       if v_1.Op != OpAMD64SHRXQ {
-                               continue
-                       }
-                       _ = v_1.Args[1]
-                       hi := v_1.Args[0]
-                       v_1_1 := v_1.Args[1]
-                       if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] {
-                               continue
-                       }
-                       v.reset(OpAMD64SHLDQ)
-                       v.AddArg3(lo, hi, bits)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ (MOVQconst [c]) (MOVQconst [d]))
-       // result: (MOVQconst [c|d])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_0.AuxInt)
-                       if v_1.Op != OpAMD64MOVQconst {
-                               continue
-                       }
-                       d := auxIntToInt64(v_1.AuxInt)
-                       v.reset(OpAMD64MOVQconst)
-                       v.AuxInt = int64ToAuxInt(c | d)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ x x)
-       // result: x
-       for {
-               x := v_0
-               if x != v_1 {
-                       break
-               }
-               v.copyOf(x)
+               v.reset(OpAMD64ORLmodify)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(base, val, mem)
                return true
        }
-       // match: (ORQ x0:(MOVBload [i0] {s} p mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ x0:(MOVBload [i] {s} p0 mem) sh:(SHLQconst [8] x1:(MOVBload [i] {s} p1 mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p0 := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVBload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       p1 := x1.Args[0]
-                       if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p0, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ x0:(MOVWload [i0] {s} p mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ x0:(MOVWload [i] {s} p0 mem) sh:(SHLQconst [16] x1:(MOVWload [i] {s} p1 mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p0 := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVWload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       p1 := x1.Args[0]
-                       if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p0, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ x0:(MOVLload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
-       // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVLload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 32 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVLload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVQload, typ.UInt64)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ x0:(MOVLload [i] {s} p0 mem) sh:(SHLQconst [32] x1:(MOVLload [i] {s} p1 mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVLload {
-                               continue
-                       }
-                       i := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p0 := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 32 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVLload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       p1 := x1.Args[0]
-                       if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVQload, typ.UInt64)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p0, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
-       // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpAMD64SHLQconst {
-                               continue
-                       }
-                       j1 := auxIntToInt8(s1.AuxInt)
-                       x1 := s1.Args[0]
-                       if x1.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORQ {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s0 := or_0
-                               if s0.Op != OpAMD64SHLQconst {
-                                       continue
-                               }
-                               j0 := auxIntToInt8(s0.AuxInt)
-                               x0 := s0.Args[0]
-                               if x0.Op != OpAMD64MOVBload {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x0.AuxInt)
-                               if auxToSym(x0.Aux) != s {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               if p != x0.Args[0] || mem != x0.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x0.Pos, OpAMD64ORQ, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j0)
-                               v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
-                               v2.AuxInt = int32ToAuxInt(i0)
-                               v2.Aux = symToAux(s)
-                               v2.AddArg2(p, mem)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i] {s} p1 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i] {s} p0 mem)) y))
-       // cond: j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpAMD64SHLQconst {
-                               continue
-                       }
-                       j1 := auxIntToInt8(s1.AuxInt)
-                       x1 := s1.Args[0]
-                       if x1.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p1 := x1.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORQ {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s0 := or_0
-                               if s0.Op != OpAMD64SHLQconst {
-                                       continue
-                               }
-                               j0 := auxIntToInt8(s0.AuxInt)
-                               x0 := s0.Args[0]
-                               if x0.Op != OpAMD64MOVBload || auxIntToInt32(x0.AuxInt) != i || auxToSym(x0.Aux) != s {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               p0 := x0.Args[0]
-                               if mem != x0.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x0.Pos, OpAMD64ORQ, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j0)
-                               v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
-                               v2.AuxInt = int32ToAuxInt(i)
-                               v2.Aux = symToAux(s)
-                               v2.AddArg2(p0, mem)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y))
-       // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpAMD64SHLQconst {
-                               continue
-                       }
-                       j1 := auxIntToInt8(s1.AuxInt)
-                       x1 := s1.Args[0]
-                       if x1.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORQ {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s0 := or_0
-                               if s0.Op != OpAMD64SHLQconst {
-                                       continue
-                               }
-                               j0 := auxIntToInt8(s0.AuxInt)
-                               x0 := s0.Args[0]
-                               if x0.Op != OpAMD64MOVWload {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x0.AuxInt)
-                               if auxToSym(x0.Aux) != s {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               if p != x0.Args[0] || mem != x0.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x0.Pos, OpAMD64ORQ, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j0)
-                               v2 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
-                               v2.AuxInt = int32ToAuxInt(i0)
-                               v2.Aux = symToAux(s)
-                               v2.AddArg2(p, mem)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i] {s} p1 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i] {s} p0 mem)) y))
-       // cond: j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i] {s} p0 mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpAMD64SHLQconst {
-                               continue
-                       }
-                       j1 := auxIntToInt8(s1.AuxInt)
-                       x1 := s1.Args[0]
-                       if x1.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p1 := x1.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORQ {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s0 := or_0
-                               if s0.Op != OpAMD64SHLQconst {
-                                       continue
-                               }
-                               j0 := auxIntToInt8(s0.AuxInt)
-                               x0 := s0.Args[0]
-                               if x0.Op != OpAMD64MOVWload || auxIntToInt32(x0.AuxInt) != i || auxToSym(x0.Aux) != s {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               p0 := x0.Args[0]
-                               if mem != x0.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x0.Pos, OpAMD64ORQ, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j0)
-                               v2 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
-                               v2.AuxInt = int32ToAuxInt(i)
-                               v2.Aux = symToAux(s)
-                               v2.AddArg2(p0, mem)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (ORQ x1:(MOVBload [i1] {s} p mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x1 := v_0
-                       if x1.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := sh.Args[0]
-                       if x0.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       if auxToSym(x0.Aux) != s {
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ORQ (SHLQ (MOVQconst [1]) y) x)
+       // result: (BTSQ x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLQ {
                                continue
                        }
-                       _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+                       y := v_0.Args[1]
+                       v_0_0 := v_0.Args[0]
+                       if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 {
                                continue
                        }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, v.Type)
-                       v.copyOf(v0)
-                       v0.AuxInt = int8ToAuxInt(8)
-                       v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
-                       v1.AuxInt = int32ToAuxInt(i0)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p, mem)
-                       v0.AddArg(v1)
+                       x := v_1
+                       v.reset(OpAMD64BTSQ)
+                       v.AddArg2(x, y)
                        return true
                }
                break
        }
-       // match: (ORQ x1:(MOVBload [i] {s} p1 mem) sh:(SHLQconst [8] x0:(MOVBload [i] {s} p0 mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem))
+       // match: (ORQ (MOVQconst [c]) x)
+       // cond: isUint64PowerOfTwo(c) && uint64(c) >= 128
+       // result: (BTSQconst [int8(log64(c))] x)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x1 := v_0
-                       if x1.Op != OpAMD64MOVBload {
-                               continue
-                       }
-                       i := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p1 := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := sh.Args[0]
-                       if x0.Op != OpAMD64MOVBload || auxIntToInt32(x0.AuxInt) != i || auxToSym(x0.Aux) != s {
+                       if v_0.Op != OpAMD64MOVQconst {
                                continue
                        }
-                       _ = x0.Args[1]
-                       p0 := x0.Args[0]
-                       if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+                       c := auxIntToInt64(v_0.AuxInt)
+                       x := v_1
+                       if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) {
                                continue
                        }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, v.Type)
-                       v.copyOf(v0)
-                       v0.AuxInt = int8ToAuxInt(8)
-                       v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
-                       v1.AuxInt = int32ToAuxInt(i)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p0, mem)
-                       v0.AddArg(v1)
+                       v.reset(OpAMD64BTSQconst)
+                       v.AuxInt = int8ToAuxInt(int8(log64(c)))
+                       v.AddArg(x)
                        return true
                }
                break
        }
-       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       // match: (ORQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (ORQconst [int32(c)] x)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r1 := v_0
-                       if r1.Op != OpAMD64ROLWconst || auxIntToInt8(r1.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       r0 := sh.Args[0]
-                       if r0.Op != OpAMD64ROLWconst || auxIntToInt8(r0.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       if auxToSym(x0.Aux) != s {
+                       x := v_0
+                       if v_1.Op != OpAMD64MOVQconst {
                                continue
                        }
-                       _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(is32Bit(c)) {
                                continue
                        }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
-                       v1.AuxInt = int32ToAuxInt(i0)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p, mem)
-                       v0.AddArg(v1)
+                       v.reset(OpAMD64ORQconst)
+                       v.AuxInt = int32ToAuxInt(int32(c))
+                       v.AddArg(x)
                        return true
                }
                break
        }
-       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem))
+       // match: (ORQ x (MOVLconst [c]))
+       // result: (ORQconst [c] x)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r1 := v_0
-                       if r1.Op != OpAMD64ROLWconst || auxIntToInt8(r1.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p1 := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       r0 := sh.Args[0]
-                       if r0.Op != OpAMD64ROLWconst || auxIntToInt8(r0.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpAMD64MOVWload || auxIntToInt32(x0.AuxInt) != i || auxToSym(x0.Aux) != s {
-                               continue
-                       }
-                       _ = x0.Args[1]
-                       p0 := x0.Args[0]
-                       if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+                       x := v_0
+                       if v_1.Op != OpAMD64MOVLconst {
                                continue
                        }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
-                       v1.AuxInt = int32ToAuxInt(i)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p0, mem)
-                       v0.AddArg(v1)
+                       c := auxIntToInt32(v_1.AuxInt)
+                       v.reset(OpAMD64ORQconst)
+                       v.AuxInt = int32ToAuxInt(c)
+                       v.AddArg(x)
                        return true
                }
                break
        }
-       // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
-       // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
+       // match: (ORQ (SHRQ lo bits) (SHLQ hi (NEGQ bits)))
+       // result: (SHRDQ lo hi bits)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r1 := v_0
-                       if r1.Op != OpAMD64BSWAPL {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpAMD64MOVLload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 32 {
-                               continue
-                       }
-                       r0 := sh.Args[0]
-                       if r0.Op != OpAMD64BSWAPL {
-                               continue
-                       }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpAMD64MOVLload {
+                       if v_0.Op != OpAMD64SHRQ {
                                continue
                        }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       if auxToSym(x0.Aux) != s {
+                       bits := v_0.Args[1]
+                       lo := v_0.Args[0]
+                       if v_1.Op != OpAMD64SHLQ {
                                continue
                        }
-                       _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+                       _ = v_1.Args[1]
+                       hi := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] {
                                continue
                        }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, v.Type)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x0.Pos, OpAMD64MOVQload, typ.UInt64)
-                       v1.AuxInt = int32ToAuxInt(i0)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p, mem)
-                       v0.AddArg(v1)
+                       v.reset(OpAMD64SHRDQ)
+                       v.AddArg3(lo, hi, bits)
                        return true
                }
                break
        }
-       // match: (ORQ r1:(BSWAPL x1:(MOVLload [i] {s} p1 mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i] {s} p0 mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i] {s} p0 mem))
+       // match: (ORQ (SHLQ lo bits) (SHRQ hi (NEGQ bits)))
+       // result: (SHLDQ lo hi bits)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r1 := v_0
-                       if r1.Op != OpAMD64BSWAPL {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpAMD64MOVLload {
-                               continue
-                       }
-                       i := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p1 := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 32 {
-                               continue
-                       }
-                       r0 := sh.Args[0]
-                       if r0.Op != OpAMD64BSWAPL {
+                       if v_0.Op != OpAMD64SHLQ {
                                continue
                        }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpAMD64MOVLload || auxIntToInt32(x0.AuxInt) != i || auxToSym(x0.Aux) != s {
+                       bits := v_0.Args[1]
+                       lo := v_0.Args[0]
+                       if v_1.Op != OpAMD64SHRQ {
                                continue
                        }
-                       _ = x0.Args[1]
-                       p0 := x0.Args[0]
-                       if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+                       _ = v_1.Args[1]
+                       hi := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] {
                                continue
                        }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, v.Type)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x0.Pos, OpAMD64MOVQload, typ.UInt64)
-                       v1.AuxInt = int32ToAuxInt(i)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p0, mem)
-                       v0.AddArg(v1)
+                       v.reset(OpAMD64SHLDQ)
+                       v.AddArg3(lo, hi, bits)
                        return true
                }
                break
        }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
-       // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       // match: (ORQ (SHRXQ lo bits) (SHLXQ hi (NEGQ bits)))
+       // result: (SHRDQ lo hi bits)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpAMD64SHLQconst {
+                       if v_0.Op != OpAMD64SHRXQ {
                                continue
                        }
-                       j0 := auxIntToInt8(s0.AuxInt)
-                       x0 := s0.Args[0]
-                       if x0.Op != OpAMD64MOVBload {
+                       bits := v_0.Args[1]
+                       lo := v_0.Args[0]
+                       if v_1.Op != OpAMD64SHLXQ {
                                continue
                        }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORQ {
+                       _ = v_1.Args[1]
+                       hi := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] {
                                continue
                        }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s1 := or_0
-                               if s1.Op != OpAMD64SHLQconst {
-                                       continue
-                               }
-                               j1 := auxIntToInt8(s1.AuxInt)
-                               x1 := s1.Args[0]
-                               if x1.Op != OpAMD64MOVBload {
-                                       continue
-                               }
-                               i1 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x1.Pos, OpAMD64ORQ, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x1.Pos, OpAMD64SHLQconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j1)
-                               v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16)
-                               v2.AuxInt = int8ToAuxInt(8)
-                               v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
-                               v3.AuxInt = int32ToAuxInt(i0)
-                               v3.Aux = symToAux(s)
-                               v3.AddArg2(p, mem)
-                               v2.AddArg(v3)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
+                       v.reset(OpAMD64SHRDQ)
+                       v.AddArg3(lo, hi, bits)
+                       return true
                }
                break
        }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i] {s} p0 mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i] {s} p1 mem)) y))
-       // cond: j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y)
+       // match: (ORQ (SHLXQ lo bits) (SHRXQ hi (NEGQ bits)))
+       // result: (SHLDQ lo hi bits)
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpAMD64SHLQconst {
+                       if v_0.Op != OpAMD64SHLXQ {
                                continue
                        }
-                       j0 := auxIntToInt8(s0.AuxInt)
-                       x0 := s0.Args[0]
-                       if x0.Op != OpAMD64MOVBload {
+                       bits := v_0.Args[1]
+                       lo := v_0.Args[0]
+                       if v_1.Op != OpAMD64SHRXQ {
                                continue
                        }
-                       i := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p0 := x0.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORQ {
+                       _ = v_1.Args[1]
+                       hi := v_1.Args[0]
+                       v_1_1 := v_1.Args[1]
+                       if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] {
                                continue
                        }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s1 := or_0
-                               if s1.Op != OpAMD64SHLQconst {
-                                       continue
-                               }
-                               j1 := auxIntToInt8(s1.AuxInt)
-                               x1 := s1.Args[0]
-                               if x1.Op != OpAMD64MOVBload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               p1 := x1.Args[0]
-                               if mem != x1.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x1.Pos, OpAMD64ORQ, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x1.Pos, OpAMD64SHLQconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j1)
-                               v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16)
-                               v2.AuxInt = int8ToAuxInt(8)
-                               v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
-                               v3.AuxInt = int32ToAuxInt(i)
-                               v3.Aux = symToAux(s)
-                               v3.AddArg2(p0, mem)
-                               v2.AddArg(v3)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
+                       v.reset(OpAMD64SHLDQ)
+                       v.AddArg3(lo, hi, bits)
+                       return true
                }
                break
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y))
-       // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y)
+       // match: (ORQ (MOVQconst [c]) (MOVQconst [d]))
+       // result: (MOVQconst [c|d])
        for {
                for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpAMD64SHLQconst {
-                               continue
-                       }
-                       j0 := auxIntToInt8(s0.AuxInt)
-                       r0 := s0.Args[0]
-                       if r0.Op != OpAMD64ROLWconst || auxIntToInt8(r0.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpAMD64MOVWload {
+                       if v_0.Op != OpAMD64MOVQconst {
                                continue
                        }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORQ {
+                       c := auxIntToInt64(v_0.AuxInt)
+                       if v_1.Op != OpAMD64MOVQconst {
                                continue
                        }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s1 := or_0
-                               if s1.Op != OpAMD64SHLQconst {
-                                       continue
-                               }
-                               j1 := auxIntToInt8(s1.AuxInt)
-                               r1 := s1.Args[0]
-                               if r1.Op != OpAMD64ROLWconst || auxIntToInt8(r1.AuxInt) != 8 {
-                                       continue
-                               }
-                               x1 := r1.Args[0]
-                               if x1.Op != OpAMD64MOVWload {
-                                       continue
-                               }
-                               i1 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x1.Pos, OpAMD64ORQ, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x1.Pos, OpAMD64SHLQconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j1)
-                               v2 := b.NewValue0(x1.Pos, OpAMD64BSWAPL, typ.UInt32)
-                               v3 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32)
-                               v3.AuxInt = int32ToAuxInt(i0)
-                               v3.Aux = symToAux(s)
-                               v3.AddArg2(p, mem)
-                               v2.AddArg(v3)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
+                       d := auxIntToInt64(v_1.AuxInt)
+                       v.reset(OpAMD64MOVQconst)
+                       v.AuxInt = int64ToAuxInt(c | d)
+                       return true
                }
                break
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem))) y))
-       // cond: j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i] {s} p0 mem))) y)
+       // match: (ORQ x x)
+       // result: x
        for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpAMD64SHLQconst {
-                               continue
-                       }
-                       j0 := auxIntToInt8(s0.AuxInt)
-                       r0 := s0.Args[0]
-                       if r0.Op != OpAMD64ROLWconst || auxIntToInt8(r0.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpAMD64MOVWload {
-                               continue
-                       }
-                       i := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p0 := x0.Args[0]
-                       or := v_1
-                       if or.Op != OpAMD64ORQ {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s1 := or_0
-                               if s1.Op != OpAMD64SHLQconst {
-                                       continue
-                               }
-                               j1 := auxIntToInt8(s1.AuxInt)
-                               r1 := s1.Args[0]
-                               if r1.Op != OpAMD64ROLWconst || auxIntToInt8(r1.AuxInt) != 8 {
-                                       continue
-                               }
-                               x1 := r1.Args[0]
-                               if x1.Op != OpAMD64MOVWload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               p1 := x1.Args[0]
-                               if mem != x1.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x1.Pos, OpAMD64ORQ, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x1.Pos, OpAMD64SHLQconst, v.Type)
-                               v1.AuxInt = int8ToAuxInt(j1)
-                               v2 := b.NewValue0(x1.Pos, OpAMD64BSWAPL, typ.UInt32)
-                               v3 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32)
-                               v3.AuxInt = int32ToAuxInt(i)
-                               v3.Aux = symToAux(s)
-                               v3.AddArg2(p0, mem)
-                               v2.AddArg(v3)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
+               x := v_0
+               if x != v_1 {
+                       break
                }
-               break
+               v.copyOf(x)
+               return true
        }
        // match: (ORQ x l:(MOVQload [off] {sym} ptr mem))
        // cond: canMergeLoadClobber(v, l, x) && clobber(l)
@@ -18186,81 +15286,6 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                }
                break
        }
-       // match: (ORQ x0:(MOVBELload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVBELload [i1] {s} p mem)))
-       // cond: i0 == i1+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVBEQload [i1] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVBELload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 32 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVBELload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i0 == i1+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVBEQload, typ.UInt64)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i1)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORQ x0:(MOVBELload [i] {s} p0 mem) sh:(SHLQconst [32] x1:(MOVBELload [i] {s} p1 mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p1, p0, 4) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpAMD64MOVBELload {
-                               continue
-                       }
-                       i := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p0 := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpAMD64SHLQconst || auxIntToInt8(sh.AuxInt) != 32 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpAMD64MOVBELload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       p1 := x1.Args[0]
-                       if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p1, p0, 4) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpAMD64MOVBEQload, typ.UInt64)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p1, mem)
-                       return true
-               }
-               break
-       }
        return false
 }
 func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool {
@@ -27794,6 +24819,18 @@ func rewriteValueAMD64_OpBitLen8(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpBswap16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Bswap16 x)
+       // result: (ROLWconst [8] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = int8ToAuxInt(8)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueAMD64_OpCeil(v *Value) bool {
        v_0 := v.Args[0]
        // match: (Ceil x)
index ca1704fe41091ce515ff8c3f5f2f669313698938..b655c62720d7f986587e8deb6dc69027d049894c 100644 (file)
@@ -545,6 +545,9 @@ func rewriteValueARM64(v *Value) bool {
                return true
        case OpBitRev8:
                return rewriteValueARM64_OpBitRev8(v)
+       case OpBswap16:
+               v.Op = OpARM64REV16W
+               return true
        case OpBswap32:
                v.Op = OpARM64REVW
                return true
@@ -8726,7 +8729,6 @@ func rewriteValueARM64_OpARM64MOVBstore(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
-       typ := &b.Func.Config.Types
        // match: (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
        // result: (MOVBstore [off1+int32(off2)] {sym} ptr val mem)
@@ -8910,1680 +8912,1489 @@ func rewriteValueARM64_OpARM64MOVBstore(v *Value) bool {
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstoreidx(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVBstoreidx ptr (MOVDconst [c]) val mem)
+       // cond: is32Bit(c)
+       // result: (MOVBstore [int32(c)] ptr val mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               val := v_2
+               mem := v_3
+               if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w, mem)
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       // match: (MOVBstoreidx (MOVDconst [c]) idx val mem)
+       // cond: is32Bit(c)
+       // result: (MOVBstore [int32(c)] idx val mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               c := auxIntToInt64(v_0.AuxInt)
+               idx := v_1
+               val := v_2
+               mem := v_3
+               if !(is32Bit(c)) {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 8 {
-                               continue
-                       }
-                       w := v_1.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if w != x.Args[2] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstoreidx)
-                       v.AddArg4(ptr1, idx1, w, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg3(idx, val, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       // match: (MOVBstoreidx ptr idx (MOVDconst [0]) mem)
+       // result: (MOVBstorezeroidx ptr idx mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64UBFX || auxIntToArm64BitField(v_1.AuxInt) != armBFAuxInt(8, 8) {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
                        break
                }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               mem := v_3
+               v.reset(OpARM64MOVBstorezeroidx)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVBreg x) mem)
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVBreg {
                        break
                }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVBUreg x) mem)
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVBUreg {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w, mem)
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       // match: (MOVBstoreidx ptr idx (MOVHreg x) mem)
+       // result: (MOVBstoreidx ptr idx x mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVHreg {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVHUreg x) mem)
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVHUreg {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64UBFX || auxIntToArm64BitField(v_1.AuxInt) != armBFAuxInt(8, 8) {
-                               continue
-                       }
-                       w := v_1.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if w != x.Args[2] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstoreidx)
-                       v.AddArg4(ptr1, idx1, w, mem)
-                       return true
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVWreg x) mem)
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVWreg {
+                       break
                }
-               break
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       // match: (MOVBstoreidx ptr idx (MOVWUreg x) mem)
+       // result: (MOVBstoreidx ptr idx x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64UBFX || auxIntToArm64BitField(v_1.AuxInt) != armBFAuxInt(8, 24) {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstorezero(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVBstorezero [off1+int32(off2)] {sym} ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               off2 := auxIntToInt64(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w, mem)
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       // match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64UBFX || auxIntToArm64BitField(v_1.AuxInt) != armBFAuxInt(8, 24) {
-                               continue
-                       }
-                       w := v_1.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if w != x.Args[2] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstoreidx)
-                       v.AddArg4(ptr1, idx1, w, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       // match: (MOVBstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBstorezeroidx ptr idx mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 8 {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(off == 0 && sym == nil) {
                        break
                }
-               w := v_1_0.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               v.reset(OpARM64MOVBstorezeroidx)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstorezeroidx(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVBstorezeroidx ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c)
+       // result: (MOVBstorezero [int32(c)] ptr mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w, mem)
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       // match: (MOVBstorezeroidx (MOVDconst [c]) idx mem)
+       // cond: is32Bit(c)
+       // result: (MOVBstorezero [int32(c)] idx mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               c := auxIntToInt64(v_0.AuxInt)
+               idx := v_1
+               mem := v_2
+               if !(is32Bit(c)) {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 8 {
-                               continue
-                       }
-                       v_1_0 := v_1.Args[0]
-                       if v_1_0.Op != OpARM64MOVDreg {
-                               continue
-                       }
-                       w := v_1_0.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if w != x.Args[2] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstoreidx)
-                       v.AddArg4(ptr1, idx1, w, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg2(idx, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _))
+       // result: (FMOVDfpgp val)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               j := auxIntToInt64(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpARM64FMOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
                        break
                }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst || auxIntToInt64(w0.AuxInt) != j-8 || w != w0.Args[0] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w0, mem)
+               v.reset(OpARM64FMOVDfpgp)
+               v.AddArg(val)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVDload [off1+int32(off2)] {sym} ptr mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               off2 := auxIntToInt64(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64SRLconst {
-                               continue
-                       }
-                       j := auxIntToInt64(v_1.AuxInt)
-                       w := v_1.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       w0 := x.Args[2]
-                       if w0.Op != OpARM64SRLconst || auxIntToInt64(w0.AuxInt) != j-8 || w != w0.Args[0] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstoreidx)
-                       v.AddArg4(ptr1, idx1, w0, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && bfc.getARM64BFwidth() == 32 - bfc.getARM64BFlsb() && bfc2.getARM64BFwidth() == 32 - bfc2.getARM64BFlsb() && bfc2.getARM64BFlsb() == bfc.getARM64BFlsb() - 8 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
+       // match: (MOVDload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDloadidx ptr idx mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64UBFX {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               bfc := auxIntToArm64BitField(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(off == 0 && sym == nil) {
                        break
                }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64UBFX {
+               v.reset(OpARM64MOVDloadidx)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (MOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDloadidx8 ptr idx mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
                        break
                }
-               bfc2 := auxIntToArm64BitField(w0.AuxInt)
-               if w != w0.Args[0] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && bfc.getARM64BFwidth() == 32-bfc.getARM64BFlsb() && bfc2.getARM64BFwidth() == 32-bfc2.getARM64BFlsb() && bfc2.getARM64BFlsb() == bfc.getARM64BFlsb()-8 && clobber(x)) {
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w0, mem)
+               v.reset(OpARM64MOVDloadidx8)
+               v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && bfc.getARM64BFwidth() == 32 - bfc.getARM64BFlsb() && bfc2.getARM64BFwidth() == 32 - bfc2.getARM64BFlsb() && bfc2.getARM64BFlsb() == bfc.getARM64BFlsb() - 8 && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64UBFX {
-                               continue
-                       }
-                       bfc := auxIntToArm64BitField(v_1.AuxInt)
-                       w := v_1.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       w0 := x.Args[2]
-                       if w0.Op != OpARM64UBFX {
-                               continue
-                       }
-                       bfc2 := auxIntToArm64BitField(w0.AuxInt)
-                       if w != w0.Args[0] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && bfc.getARM64BFwidth() == 32-bfc.getARM64BFlsb() && bfc2.getARM64BFwidth() == 32-bfc2.getARM64BFlsb() && bfc2.getARM64BFlsb() == bfc.getARM64BFlsb()-8 && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstoreidx)
-                       v.AddArg4(ptr1, idx1, w0, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
+       // match: (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64SRLconst {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDstorezero {
                        break
                }
-               j := auxIntToInt64(v_1.AuxInt)
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               w := v_1_0.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(0)
+               return true
+       }
+       // match: (MOVDload [off] {sym} (SB) _)
+       // cond: symIsRO(sym)
+       // result: (MOVDconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))])
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpSB || !(symIsRO(sym)) {
                        break
                }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst || auxIntToInt64(w0.AuxInt) != j-8 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder)))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDloadidx(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVDloadidx ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c)
+       // result: (MOVDload [int32(c)] ptr mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               w0_0 := w0.Args[0]
-               if w0_0.Op != OpARM64MOVDreg || w != w0_0.Args[0] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w0, mem)
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       // match: (MOVDloadidx (MOVDconst [c]) ptr mem)
+       // cond: is32Bit(c)
+       // result: (MOVDload [int32(c)] ptr mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               c := auxIntToInt64(v_0.AuxInt)
+               ptr := v_1
+               mem := v_2
+               if !(is32Bit(c)) {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64SRLconst {
-                               continue
-                       }
-                       j := auxIntToInt64(v_1.AuxInt)
-                       v_1_0 := v_1.Args[0]
-                       if v_1_0.Op != OpARM64MOVDreg {
-                               continue
-                       }
-                       w := v_1_0.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       w0 := x.Args[2]
-                       if w0.Op != OpARM64SRLconst || auxIntToInt64(w0.AuxInt) != j-8 {
-                               continue
-                       }
-                       w0_0 := w0.Args[0]
-                       if w0_0.Op != OpARM64MOVDreg || w != w0_0.Args[0] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstoreidx)
-                       v.AddArg4(ptr1, idx1, w0, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w) x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w) x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w) x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)
-       // result: (MOVDstore [i-7] {s} ptr (REV <typ.UInt64> w) mem)
+       // match: (MOVDloadidx ptr (SLLconst [3] idx) mem)
+       // result: (MOVDloadidx8 ptr idx mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
                ptr := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpARM64MOVBstore || auxIntToInt32(x0.AuxInt) != i-1 || auxToSym(x0.Aux) != s {
-                       break
-               }
-               _ = x0.Args[2]
-               if ptr != x0.Args[0] {
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst || auxIntToInt64(x0_1.AuxInt) != 8 || w != x0_1.Args[0] {
+               idx := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64MOVDloadidx8)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (MOVDloadidx (SLLconst [3] idx) ptr mem)
+       // result: (MOVDloadidx8 ptr idx mem)
+       for {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore || auxIntToInt32(x1.AuxInt) != i-2 || auxToSym(x1.Aux) != s {
+               idx := v_0.Args[0]
+               ptr := v_1
+               mem := v_2
+               v.reset(OpARM64MOVDloadidx8)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVDstorezeroidx {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               idx2 := v_2.Args[1]
+               ptr2 := v_2.Args[0]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64SRLconst || auxIntToInt64(x1_1.AuxInt) != 16 || w != x1_1.Args[0] {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDloadidx8(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVDloadidx8 ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c<<3)
+       // result: (MOVDload [int32(c)<<3] ptr mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstore || auxIntToInt32(x2.AuxInt) != i-3 || auxToSym(x2.Aux) != s {
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c << 3)) {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = int32ToAuxInt(int32(c) << 3)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVDstorezeroidx8 {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64SRLconst || auxIntToInt64(x2_1.AuxInt) != 24 || w != x2_1.Args[0] {
+               idx2 := v_2.Args[1]
+               ptr2 := v_2.Args[0]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
                        break
                }
-               x3 := x2.Args[2]
-               if x3.Op != OpARM64MOVBstore || auxIntToInt32(x3.AuxInt) != i-4 || auxToSym(x3.Aux) != s {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDnop(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (MOVDnop (MOVDconst [c]))
+       // result: (MOVDconst [c])
+       for {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x3.Args[2]
-               if ptr != x3.Args[0] {
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(c)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDreg(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (MOVDreg x)
+       // cond: x.Uses == 1
+       // result: (MOVDnop x)
+       for {
+               x := v_0
+               if !(x.Uses == 1) {
                        break
                }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64SRLconst || auxIntToInt64(x3_1.AuxInt) != 32 || w != x3_1.Args[0] {
+               v.reset(OpARM64MOVDnop)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDreg (MOVDconst [c]))
+       // result: (MOVDconst [c])
+       for {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x4 := x3.Args[2]
-               if x4.Op != OpARM64MOVBstore || auxIntToInt32(x4.AuxInt) != i-5 || auxToSym(x4.Aux) != s {
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(c)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstore(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem)
+       // result: (FMOVDstore [off] {sym} ptr val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpARM64FMOVDfpgp {
                        break
                }
-               _ = x4.Args[2]
-               if ptr != x4.Args[0] {
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64FMOVDstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVDstore [off1+int32(off2)] {sym} ptr val mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpARM64SRLconst || auxIntToInt64(x4_1.AuxInt) != 40 || w != x4_1.Args[0] {
+               off2 := auxIntToInt64(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               x5 := x4.Args[2]
-               if x5.Op != OpARM64MOVBstore || auxIntToInt32(x5.AuxInt) != i-6 || auxToSym(x5.Aux) != s {
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstoreidx ptr idx val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               _ = x5.Args[2]
-               if ptr != x5.Args[0] {
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(off == 0 && sym == nil) {
                        break
                }
-               x5_1 := x5.Args[1]
-               if x5_1.Op != OpARM64SRLconst || auxIntToInt64(x5_1.AuxInt) != 48 || w != x5_1.Args[0] {
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstoreidx8 ptr idx val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
                        break
                }
-               x6 := x5.Args[2]
-               if x6.Op != OpARM64MOVBstore || auxIntToInt32(x6.AuxInt) != i-7 || auxToSym(x6.Aux) != s {
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(off == 0 && sym == nil) {
                        break
                }
-               mem := x6.Args[2]
-               if ptr != x6.Args[0] {
+               v.reset(OpARM64MOVDstoreidx8)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               x6_1 := x6.Args[1]
-               if x6_1.Op != OpARM64SRLconst || auxIntToInt64(x6_1.AuxInt) != 56 || w != x6_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               ptr := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
                v.reset(OpARM64MOVDstore)
-               v.AuxInt = int32ToAuxInt(i - 7)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x6.Pos, OpARM64REV, typ.UInt64)
-               v0.AddArg(w)
-               v.AddArg3(ptr, v0, mem)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVBstore [7] {s} p w x0:(MOVBstore [6] {s} p (SRLconst [8] w) x1:(MOVBstore [5] {s} p (SRLconst [16] w) x2:(MOVBstore [4] {s} p (SRLconst [24] w) x3:(MOVBstore [3] {s} p (SRLconst [32] w) x4:(MOVBstore [2] {s} p (SRLconst [40] w) x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w) x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, x5, x6)
-       // result: (MOVDstoreidx ptr0 idx0 (REV <typ.UInt64> w) mem)
+       // match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // result: (MOVDstorezero [off] {sym} ptr mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 7 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpARM64MOVBstore || auxIntToInt32(x0.AuxInt) != 6 || auxToSym(x0.Aux) != s {
-                       break
-               }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != 0 {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst || auxIntToInt64(x0_1.AuxInt) != 8 || w != x0_1.Args[0] {
+               mem := v_2
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstoreidx(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVDstoreidx ptr (MOVDconst [c]) val mem)
+       // cond: is32Bit(c)
+       // result: (MOVDstore [int32(c)] ptr val mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore || auxIntToInt32(x1.AuxInt) != 5 || auxToSym(x1.Aux) != s {
+               c := auxIntToInt64(v_1.AuxInt)
+               val := v_2
+               mem := v_3
+               if !(is32Bit(c)) {
                        break
                }
-               _ = x1.Args[2]
-               if p != x1.Args[0] {
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (MOVDstoreidx (MOVDconst [c]) idx val mem)
+       // cond: is32Bit(c)
+       // result: (MOVDstore [int32(c)] idx val mem)
+       for {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64SRLconst || auxIntToInt64(x1_1.AuxInt) != 16 || w != x1_1.Args[0] {
+               c := auxIntToInt64(v_0.AuxInt)
+               idx := v_1
+               val := v_2
+               mem := v_3
+               if !(is32Bit(c)) {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstore || auxIntToInt32(x2.AuxInt) != 4 || auxToSym(x2.Aux) != s {
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg3(idx, val, mem)
+               return true
+       }
+       // match: (MOVDstoreidx ptr (SLLconst [3] idx) val mem)
+       // result: (MOVDstoreidx8 ptr idx val mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
                        break
                }
-               _ = x2.Args[2]
-               if p != x2.Args[0] {
+               idx := v_1.Args[0]
+               val := v_2
+               mem := v_3
+               v.reset(OpARM64MOVDstoreidx8)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (MOVDstoreidx (SLLconst [3] idx) ptr val mem)
+       // result: (MOVDstoreidx8 ptr idx val mem)
+       for {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64SRLconst || auxIntToInt64(x2_1.AuxInt) != 24 || w != x2_1.Args[0] {
+               idx := v_0.Args[0]
+               ptr := v_1
+               val := v_2
+               mem := v_3
+               v.reset(OpARM64MOVDstoreidx8)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (MOVDstoreidx ptr idx (MOVDconst [0]) mem)
+       // result: (MOVDstorezeroidx ptr idx mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
                        break
                }
-               x3 := x2.Args[2]
-               if x3.Op != OpARM64MOVBstore || auxIntToInt32(x3.AuxInt) != 3 || auxToSym(x3.Aux) != s {
+               mem := v_3
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstoreidx8(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVDstoreidx8 ptr (MOVDconst [c]) val mem)
+       // cond: is32Bit(c<<3)
+       // result: (MOVDstore [int32(c)<<3] ptr val mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x3.Args[2]
-               if p != x3.Args[0] {
+               c := auxIntToInt64(v_1.AuxInt)
+               val := v_2
+               mem := v_3
+               if !(is32Bit(c << 3)) {
                        break
                }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64SRLconst || auxIntToInt64(x3_1.AuxInt) != 32 || w != x3_1.Args[0] {
-                       break
-               }
-               x4 := x3.Args[2]
-               if x4.Op != OpARM64MOVBstore || auxIntToInt32(x4.AuxInt) != 2 || auxToSym(x4.Aux) != s {
-                       break
-               }
-               _ = x4.Args[2]
-               if p != x4.Args[0] {
-                       break
-               }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpARM64SRLconst || auxIntToInt64(x4_1.AuxInt) != 40 || w != x4_1.Args[0] {
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = int32ToAuxInt(int32(c) << 3)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem)
+       // result: (MOVDstorezeroidx8 ptr idx mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
                        break
                }
-               x5 := x4.Args[2]
-               if x5.Op != OpARM64MOVBstore || auxIntToInt32(x5.AuxInt) != 1 || auxToSym(x5.Aux) != s {
+               mem := v_3
+               v.reset(OpARM64MOVDstorezeroidx8)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstorezero(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i+8] ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVQstorezero {s} [i] ptr mem)
+       for {
+               i := auxIntToInt32(v.AuxInt)
+               s := auxToSym(v.Aux)
+               ptr := v_0
+               x := v_1
+               if x.Op != OpARM64MOVDstorezero || auxIntToInt32(x.AuxInt) != i+8 || auxToSym(x.Aux) != s {
                        break
                }
-               _ = x5.Args[2]
-               p1 := x5.Args[0]
-               if p1.Op != OpARM64ADD {
+               mem := x.Args[1]
+               if ptr != x.Args[0] || !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               _ = p1.Args[1]
-               p1_0 := p1.Args[0]
-               p1_1 := p1.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, p1_0, p1_1 = _i0+1, p1_1, p1_0 {
-                       ptr1 := p1_0
-                       idx1 := p1_1
-                       x5_1 := x5.Args[1]
-                       if x5_1.Op != OpARM64SRLconst || auxIntToInt64(x5_1.AuxInt) != 48 || w != x5_1.Args[0] {
-                               continue
-                       }
-                       x6 := x5.Args[2]
-                       if x6.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x6.Args[3]
-                       ptr0 := x6.Args[0]
-                       idx0 := x6.Args[1]
-                       x6_2 := x6.Args[2]
-                       if x6_2.Op != OpARM64SRLconst || auxIntToInt64(x6_2.AuxInt) != 56 || w != x6_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, x5, x6)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVDstoreidx)
-                       v0 := b.NewValue0(x5.Pos, OpARM64REV, typ.UInt64)
-                       v0.AddArg(w)
-                       v.AddArg4(ptr0, idx0, v0, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = int32ToAuxInt(i)
+               v.Aux = symToAux(s)
+               v.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) x1:(MOVBstore [i-2] {s} ptr (UBFX [armBFAuxInt(16, 16)] w) x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)
-       // result: (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
+       // match: (MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i-8] ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVQstorezero {s} [i-8] ptr mem)
        for {
                i := auxIntToInt32(v.AuxInt)
                s := auxToSym(v.Aux)
                ptr := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpARM64MOVBstore || auxIntToInt32(x0.AuxInt) != i-1 || auxToSym(x0.Aux) != s {
-                       break
-               }
-               _ = x0.Args[2]
-               if ptr != x0.Args[0] {
+               x := v_1
+               if x.Op != OpARM64MOVDstorezero || auxIntToInt32(x.AuxInt) != i-8 || auxToSym(x.Aux) != s {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64UBFX || auxIntToArm64BitField(x0_1.AuxInt) != armBFAuxInt(8, 24) || w != x0_1.Args[0] {
+               mem := x.Args[1]
+               if ptr != x.Args[0] || !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore || auxIntToInt32(x1.AuxInt) != i-2 || auxToSym(x1.Aux) != s {
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = int32ToAuxInt(i - 8)
+               v.Aux = symToAux(s)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVDstorezero [off1+int32(off2)] {sym} ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               off2 := auxIntToInt64(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64UBFX || auxIntToArm64BitField(x1_1.AuxInt) != armBFAuxInt(16, 16) || w != x1_1.Args[0] {
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstore || auxIntToInt32(x2.AuxInt) != i-3 || auxToSym(x2.Aux) != s {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               mem := x2.Args[2]
-               if ptr != x2.Args[0] {
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVDstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstorezeroidx ptr idx mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64UBFX || auxIntToArm64BitField(x2_1.AuxInt) != armBFAuxInt(24, 8) || w != x2_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) {
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 3)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x2.Pos, OpARM64REVW, typ.UInt32)
-               v0.AddArg(w)
-               v.AddArg3(ptr, v0, mem)
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (UBFX [armBFAuxInt(8, 24)] w) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [armBFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(24, 8)] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2)
-       // result: (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
+       // match: (MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstorezeroidx8 ptr idx mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 3 {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
                        break
                }
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpARM64MOVBstore || auxIntToInt32(x0.AuxInt) != 2 || auxToSym(x0.Aux) != s {
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(off == 0 && sym == nil) {
                        break
                }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
+               v.reset(OpARM64MOVDstorezeroidx8)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstorezeroidx(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVDstorezeroidx ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c)
+       // result: (MOVDstorezero [int32(c)] ptr mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64UBFX || auxIntToArm64BitField(x0_1.AuxInt) != armBFAuxInt(8, 24) || w != x0_1.Args[0] {
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c)) {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore || auxIntToInt32(x1.AuxInt) != 1 || auxToSym(x1.Aux) != s {
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVDstorezeroidx (MOVDconst [c]) idx mem)
+       // cond: is32Bit(c)
+       // result: (MOVDstorezero [int32(c)] idx mem)
+       for {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x1.Args[2]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               c := auxIntToInt64(v_0.AuxInt)
+               idx := v_1
+               mem := v_2
+               if !(is32Bit(c)) {
                        break
                }
-               _ = p1.Args[1]
-               p1_0 := p1.Args[0]
-               p1_1 := p1.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, p1_0, p1_1 = _i0+1, p1_1, p1_0 {
-                       ptr1 := p1_0
-                       idx1 := p1_1
-                       x1_1 := x1.Args[1]
-                       if x1_1.Op != OpARM64UBFX || auxIntToArm64BitField(x1_1.AuxInt) != armBFAuxInt(16, 16) || w != x1_1.Args[0] {
-                               continue
-                       }
-                       x2 := x1.Args[2]
-                       if x2.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x2.Args[3]
-                       ptr0 := x2.Args[0]
-                       idx0 := x2.Args[1]
-                       x2_2 := x2.Args[2]
-                       if x2_2.Op != OpARM64UBFX || auxIntToArm64BitField(x2_2.AuxInt) != armBFAuxInt(24, 8) || w != x2_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVWstoreidx)
-                       v0 := b.NewValue0(x1.Pos, OpARM64REVW, typ.UInt32)
-                       v0.AddArg(w)
-                       v.AddArg4(ptr0, idx0, v0, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg2(idx, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)
-       // result: (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
+       // match: (MOVDstorezeroidx ptr (SLLconst [3] idx) mem)
+       // result: (MOVDstorezeroidx8 ptr idx mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
                ptr := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpARM64MOVBstore || auxIntToInt32(x0.AuxInt) != i-1 || auxToSym(x0.Aux) != s {
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
                        break
                }
-               _ = x0.Args[2]
-               if ptr != x0.Args[0] {
+               idx := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64MOVDstorezeroidx8)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (MOVDstorezeroidx (SLLconst [3] idx) ptr mem)
+       // result: (MOVDstorezeroidx8 ptr idx mem)
+       for {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst || auxIntToInt64(x0_1.AuxInt) != 8 {
+               idx := v_0.Args[0]
+               ptr := v_1
+               mem := v_2
+               v.reset(OpARM64MOVDstorezeroidx8)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstorezeroidx8(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVDstorezeroidx8 ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c<<3)
+       // result: (MOVDstorezero [int32(c<<3)] ptr mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x0_1_0 := x0_1.Args[0]
-               if x0_1_0.Op != OpARM64MOVDreg || w != x0_1_0.Args[0] {
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c << 3)) {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore || auxIntToInt32(x1.AuxInt) != i-2 || auxToSym(x1.Aux) != s {
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = int32ToAuxInt(int32(c << 3))
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVHUload [off1+int32(off2)] {sym} ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               off2 := auxIntToInt64(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64SRLconst || auxIntToInt64(x1_1.AuxInt) != 16 {
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVHUload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHUloadidx ptr idx mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               x1_1_0 := x1_1.Args[0]
-               if x1_1_0.Op != OpARM64MOVDreg || w != x1_1_0.Args[0] {
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(off == 0 && sym == nil) {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstore || auxIntToInt32(x2.AuxInt) != i-3 || auxToSym(x2.Aux) != s {
+               v.reset(OpARM64MOVHUloadidx)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHUloadidx2 ptr idx mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
                        break
                }
-               mem := x2.Args[2]
-               if ptr != x2.Args[0] {
-                       break
-               }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64SRLconst || auxIntToInt64(x2_1.AuxInt) != 24 {
-                       break
-               }
-               x2_1_0 := x2_1.Args[0]
-               if x2_1_0.Op != OpARM64MOVDreg || w != x2_1_0.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) {
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 3)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x2.Pos, OpARM64REVW, typ.UInt32)
-               v0.AddArg(w)
-               v.AddArg3(ptr, v0, mem)
+               v.reset(OpARM64MOVHUloadidx2)
+               v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w)) x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2)
-       // result: (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
+       // match: (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 3 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpARM64MOVBstore || auxIntToInt32(x0.AuxInt) != 2 || auxToSym(x0.Aux) != s {
-                       break
-               }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst || auxIntToInt64(x0_1.AuxInt) != 8 {
-                       break
-               }
-               x0_1_0 := x0_1.Args[0]
-               if x0_1_0.Op != OpARM64MOVDreg || w != x0_1_0.Args[0] {
-                       break
-               }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore || auxIntToInt32(x1.AuxInt) != 1 || auxToSym(x1.Aux) != s {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               _ = x1.Args[2]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               _ = p1.Args[1]
-               p1_0 := p1.Args[0]
-               p1_1 := p1.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, p1_0, p1_1 = _i0+1, p1_1, p1_0 {
-                       ptr1 := p1_0
-                       idx1 := p1_1
-                       x1_1 := x1.Args[1]
-                       if x1_1.Op != OpARM64SRLconst || auxIntToInt64(x1_1.AuxInt) != 16 {
-                               continue
-                       }
-                       x1_1_0 := x1_1.Args[0]
-                       if x1_1_0.Op != OpARM64MOVDreg || w != x1_1_0.Args[0] {
-                               continue
-                       }
-                       x2 := x1.Args[2]
-                       if x2.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x2.Args[3]
-                       ptr0 := x2.Args[0]
-                       idx0 := x2.Args[1]
-                       x2_2 := x2.Args[2]
-                       if x2_2.Op != OpARM64SRLconst || auxIntToInt64(x2_2.AuxInt) != 24 {
-                               continue
-                       }
-                       x2_2_0 := x2_2.Args[0]
-                       if x2_2_0.Op != OpARM64MOVDreg || w != x2_2_0.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVWstoreidx)
-                       v0 := b.NewValue0(x1.Pos, OpARM64REVW, typ.UInt32)
-                       v0.AddArg(w)
-                       v.AddArg4(ptr0, idx0, v0, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)
-       // result: (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
+       // match: (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
                ptr := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpARM64MOVBstore || auxIntToInt32(x0.AuxInt) != i-1 || auxToSym(x0.Aux) != s {
-                       break
-               }
-               _ = x0.Args[2]
-               if ptr != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst || auxIntToInt64(x0_1.AuxInt) != 8 || w != x0_1.Args[0] {
-                       break
-               }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore || auxIntToInt32(x1.AuxInt) != i-2 || auxToSym(x1.Aux) != s {
-                       break
-               }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
-                       break
-               }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64SRLconst || auxIntToInt64(x1_1.AuxInt) != 16 || w != x1_1.Args[0] {
-                       break
-               }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstore || auxIntToInt32(x2.AuxInt) != i-3 || auxToSym(x2.Aux) != s {
-                       break
-               }
-               mem := x2.Args[2]
-               if ptr != x2.Args[0] {
+               if v_1.Op != OpARM64MOVHstorezero {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64SRLconst || auxIntToInt64(x2_1.AuxInt) != 24 || w != x2_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) {
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 3)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x2.Pos, OpARM64REVW, typ.UInt32)
-               v0.AddArg(w)
-               v.AddArg3(ptr, v0, mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(0)
                return true
        }
-       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (SRLconst [8] w) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w) x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2)
-       // result: (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
+       // match: (MOVHUload [off] {sym} (SB) _)
+       // cond: symIsRO(sym)
+       // result: (MOVDconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
        for {
-               if auxIntToInt32(v.AuxInt) != 3 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpARM64MOVBstore || auxIntToInt32(x0.AuxInt) != 2 || auxToSym(x0.Aux) != s {
-                       break
-               }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst || auxIntToInt64(x0_1.AuxInt) != 8 || w != x0_1.Args[0] {
-                       break
-               }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore || auxIntToInt32(x1.AuxInt) != 1 || auxToSym(x1.Aux) != s {
-                       break
-               }
-               _ = x1.Args[2]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpSB || !(symIsRO(sym)) {
                        break
                }
-               _ = p1.Args[1]
-               p1_0 := p1.Args[0]
-               p1_1 := p1.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, p1_0, p1_1 = _i0+1, p1_1, p1_0 {
-                       ptr1 := p1_0
-                       idx1 := p1_1
-                       x1_1 := x1.Args[1]
-                       if x1_1.Op != OpARM64SRLconst || auxIntToInt64(x1_1.AuxInt) != 16 || w != x1_1.Args[0] {
-                               continue
-                       }
-                       x2 := x1.Args[2]
-                       if x2.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x2.Args[3]
-                       ptr0 := x2.Args[0]
-                       idx0 := x2.Args[1]
-                       x2_2 := x2.Args[2]
-                       if x2_2.Op != OpARM64SRLconst || auxIntToInt64(x2_2.AuxInt) != 24 || w != x2_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVWstoreidx)
-                       v0 := b.NewValue0(x1.Pos, OpARM64REVW, typ.UInt32)
-                       v0.AddArg(w)
-                       v.AddArg4(ptr0, idx0, v0, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder)))
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUloadidx(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVHUloadidx ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c)
+       // result: (MOVHUload [int32(c)] ptr mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
                ptr := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpARM64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if ptr != x.Args[0] {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64SRLconst || auxIntToInt64(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x.Pos, OpARM64REV16W, typ.UInt16)
-               v0.AddArg(w)
-               v.AddArg3(ptr, v0, mem)
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
+       // match: (MOVHUloadidx (MOVDconst [c]) ptr mem)
+       // cond: is32Bit(c)
+       // result: (MOVHUload [int32(c)] ptr mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               c := auxIntToInt64(v_0.AuxInt)
+               ptr := v_1
+               mem := v_2
+               if !(is32Bit(c)) {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr1 := v_0_0
-                       idx1 := v_0_1
-                       w := v_1
-                       x := v_2
-                       if x.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr0 := x.Args[0]
-                       idx0 := x.Args[1]
-                       x_2 := x.Args[2]
-                       if x_2.Op != OpARM64SRLconst || auxIntToInt64(x_2.AuxInt) != 8 || w != x_2.Args[0] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstoreidx)
-                       v0 := b.NewValue0(v.Pos, OpARM64REV16W, typ.UInt16)
-                       v0.AddArg(w)
-                       v.AddArg4(ptr0, idx0, v0, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 8)] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
+       // match: (MOVHUloadidx ptr (SLLconst [1] idx) mem)
+       // result: (MOVHUloadidx2 ptr idx mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
                ptr := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpARM64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 1 {
                        break
                }
-               mem := x.Args[2]
-               if ptr != x.Args[0] {
+               idx := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64MOVHUloadidx2)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (MOVHUloadidx ptr (ADD idx idx) mem)
+       // result: (MOVHUloadidx2 ptr idx mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64ADD {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64UBFX || auxIntToArm64BitField(x_1.AuxInt) != armBFAuxInt(8, 8) || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               idx := v_1.Args[1]
+               if idx != v_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x.Pos, OpARM64REV16W, typ.UInt16)
-               v0.AddArg(w)
-               v.AddArg3(ptr, v0, mem)
+               mem := v_2
+               v.reset(OpARM64MOVHUloadidx2)
+               v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 8)] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
+       // match: (MOVHUloadidx (ADD idx idx) ptr mem)
+       // result: (MOVHUloadidx2 ptr idx mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
-                       break
-               }
-               s := auxToSym(v.Aux)
                if v_0.Op != OpARM64ADD {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr1 := v_0_0
-                       idx1 := v_0_1
-                       w := v_1
-                       x := v_2
-                       if x.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr0 := x.Args[0]
-                       idx0 := x.Args[1]
-                       x_2 := x.Args[2]
-                       if x_2.Op != OpARM64UBFX || auxIntToArm64BitField(x_2.AuxInt) != armBFAuxInt(8, 8) || w != x_2.Args[0] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstoreidx)
-                       v0 := b.NewValue0(v.Pos, OpARM64REV16W, typ.UInt16)
-                       v0.AddArg(w)
-                       v.AddArg4(ptr0, idx0, v0, mem)
-                       return true
+               idx := v_0.Args[1]
+               if idx != v_0.Args[0] {
+                       break
                }
-               break
+               ptr := v_1
+               mem := v_2
+               v.reset(OpARM64MOVHUloadidx2)
+               v.AddArg3(ptr, idx, mem)
+               return true
        }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
+       // match: (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
                ptr := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpARM64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
+               idx := v_1
+               if v_2.Op != OpARM64MOVHstorezeroidx {
                        break
                }
-               mem := x.Args[2]
-               if ptr != x.Args[0] {
+               idx2 := v_2.Args[1]
+               ptr2 := v_2.Args[0]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64SRLconst || auxIntToInt64(x_1.AuxInt) != 8 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUloadidx2(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVHUloadidx2 ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c<<1)
+       // result: (MOVHUload [int32(c)<<1] ptr mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x_1_0 := x_1.Args[0]
-               if x_1_0.Op != OpARM64MOVDreg || w != x_1_0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c << 1)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x.Pos, OpARM64REV16W, typ.UInt16)
-               v0.AddArg(w)
-               v.AddArg3(ptr, v0, mem)
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = int32ToAuxInt(int32(c) << 1)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
+       // match: (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVHstorezeroidx2 {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               idx2 := v_2.Args[1]
+               ptr2 := v_2.Args[0]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr1 := v_0_0
-                       idx1 := v_0_1
-                       w := v_1
-                       x := v_2
-                       if x.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr0 := x.Args[0]
-                       idx0 := x.Args[1]
-                       x_2 := x.Args[2]
-                       if x_2.Op != OpARM64SRLconst || auxIntToInt64(x_2.AuxInt) != 8 {
-                               continue
-                       }
-                       x_2_0 := x_2.Args[0]
-                       if x_2_0.Op != OpARM64MOVDreg || w != x_2_0.Args[0] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstoreidx)
-                       v0 := b.NewValue0(v.Pos, OpARM64REV16W, typ.UInt16)
-                       v0.AddArg(w)
-                       v.AddArg4(ptr0, idx0, v0, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpARM64MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if ptr != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64UBFX || auxIntToArm64BitField(x_1.AuxInt) != armBFAuxInt(8, 24) || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x.Pos, OpARM64REV16W, typ.UInt16)
-               v0.AddArg(w)
-               v.AddArg3(ptr, v0, mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(0)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 24)] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
-       for {
-               if auxIntToInt32(v.AuxInt) != 1 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr1 := v_0_0
-                       idx1 := v_0_1
-                       w := v_1
-                       x := v_2
-                       if x.Op != OpARM64MOVBstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr0 := x.Args[0]
-                       idx0 := x.Args[1]
-                       x_2 := x.Args[2]
-                       if x_2.Op != OpARM64UBFX || auxIntToArm64BitField(x_2.AuxInt) != armBFAuxInt(8, 24) || w != x_2.Args[0] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstoreidx)
-                       v0 := b.NewValue0(v.Pos, OpARM64REV16W, typ.UInt16)
-                       v0.AddArg(w)
-                       v.AddArg4(ptr0, idx0, v0, mem)
-                       return true
-               }
-               break
-       }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBstoreidx(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueARM64_OpARM64MOVHUreg(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (MOVBstoreidx ptr (MOVDconst [c]) val mem)
-       // cond: is32Bit(c)
-       // result: (MOVBstore [int32(c)] ptr val mem)
+       // match: (MOVHUreg x:(MOVBUload _ _))
+       // result: (MOVDreg x)
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c)) {
+               x := v_0
+               if x.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg3(ptr, val, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstoreidx (MOVDconst [c]) idx val mem)
-       // cond: is32Bit(c)
-       // result: (MOVBstore [int32(c)] idx val mem)
+       // match: (MOVHUreg x:(MOVHUload _ _))
+       // result: (MOVDreg x)
        for {
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               idx := v_1
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c)) {
+               x := v_0
+               if x.Op != OpARM64MOVHUload {
                        break
                }
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg3(idx, val, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVDconst [0]) mem)
-       // result: (MOVBstorezeroidx ptr idx mem)
+       // match: (MOVHUreg x:(MOVBUloadidx _ _ _))
+       // result: (MOVDreg x)
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
+               x := v_0
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
-               mem := v_3
-               v.reset(OpARM64MOVBstorezeroidx)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVBreg x) mem)
-       // result: (MOVBstoreidx ptr idx x mem)
+       // match: (MOVHUreg x:(MOVHUloadidx _ _ _))
+       // result: (MOVDreg x)
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVBreg {
+               x := v_0
+               if x.Op != OpARM64MOVHUloadidx {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVBstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVBUreg x) mem)
-       // result: (MOVBstoreidx ptr idx x mem)
+       // match: (MOVHUreg x:(MOVHUloadidx2 _ _ _))
+       // result: (MOVDreg x)
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVBUreg {
+               x := v_0
+               if x.Op != OpARM64MOVHUloadidx2 {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVBstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVHreg x) mem)
-       // result: (MOVBstoreidx ptr idx x mem)
+       // match: (MOVHUreg x:(MOVBUreg _))
+       // result: (MOVDreg x)
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVHreg {
+               x := v_0
+               if x.Op != OpARM64MOVBUreg {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVBstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVHUreg x) mem)
-       // result: (MOVBstoreidx ptr idx x mem)
+       // match: (MOVHUreg x:(MOVHUreg _))
+       // result: (MOVDreg x)
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVHUreg {
+               x := v_0
+               if x.Op != OpARM64MOVHUreg {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVBstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVWreg x) mem)
-       // result: (MOVBstoreidx ptr idx x mem)
+       // match: (MOVHUreg (ANDconst [c] x))
+       // result: (ANDconst [c&(1<<16-1)] x)
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWreg {
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVBstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
+               c := auxIntToInt64(v_0.AuxInt)
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = int64ToAuxInt(c & (1<<16 - 1))
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVWUreg x) mem)
-       // result: (MOVBstoreidx ptr idx x mem)
+       // match: (MOVHUreg (MOVDconst [c]))
+       // result: (MOVDconst [int64(uint16(c))])
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWUreg {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVBstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(int64(uint16(c)))
                return true
        }
-       // match: (MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstoreidx ptr idx w mem)
+       // match: (MOVHUreg (SLLconst [lc] x))
+       // cond: lc >= 16
+       // result: (MOVDconst [0])
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64ADDconst || auxIntToInt64(v_1.AuxInt) != 1 {
-                       break
-               }
-               idx := v_1.Args[0]
-               if v_2.Op != OpARM64SRLconst || auxIntToInt64(v_2.AuxInt) != 8 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v_3
-               if x.Op != OpARM64MOVBstoreidx {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               mem := x.Args[3]
-               if ptr != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
+               lc := auxIntToInt64(v_0.AuxInt)
+               if !(lc >= 16) {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg4(ptr, idx, w, mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(0)
                return true
        }
-       // match: (MOVBstoreidx ptr (ADDconst [3] idx) w x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(24, 8)] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)
-       // result: (MOVWstoreidx ptr idx (REVW <typ.UInt32> w) mem)
+       // match: (MOVHUreg (SLLconst [lc] x))
+       // cond: lc < 16
+       // result: (UBFIZ [armBFAuxInt(lc, 16-lc)] x)
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64ADDconst || auxIntToInt64(v_1.AuxInt) != 3 {
-                       break
-               }
-               idx := v_1.Args[0]
-               w := v_2
-               x0 := v_3
-               if x0.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               _ = x0.Args[3]
-               if ptr != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64ADDconst || auxIntToInt64(x0_1.AuxInt) != 2 || idx != x0_1.Args[0] {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               x0_2 := x0.Args[2]
-               if x0_2.Op != OpARM64UBFX || auxIntToArm64BitField(x0_2.AuxInt) != armBFAuxInt(8, 24) || w != x0_2.Args[0] {
+               lc := auxIntToInt64(v_0.AuxInt)
+               x := v_0.Args[0]
+               if !(lc < 16) {
                        break
                }
-               x1 := x0.Args[3]
-               if x1.Op != OpARM64MOVBstoreidx {
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(lc, 16-lc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg (SRLconst [rc] x))
+       // cond: rc < 16
+       // result: (UBFX [armBFAuxInt(rc, 16)] x)
+       for {
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               _ = x1.Args[3]
-               if ptr != x1.Args[0] {
+               rc := auxIntToInt64(v_0.AuxInt)
+               x := v_0.Args[0]
+               if !(rc < 16) {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst || auxIntToInt64(x1_1.AuxInt) != 1 || idx != x1_1.Args[0] {
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(rc, 16))
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg (UBFX [bfc] x))
+       // cond: bfc.getARM64BFwidth() <= 16
+       // result: (UBFX [bfc] x)
+       for {
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               x1_2 := x1.Args[2]
-               if x1_2.Op != OpARM64UBFX || auxIntToArm64BitField(x1_2.AuxInt) != armBFAuxInt(16, 16) || w != x1_2.Args[0] {
+               bfc := auxIntToArm64BitField(v_0.AuxInt)
+               x := v_0.Args[0]
+               if !(bfc.getARM64BFwidth() <= 16) {
                        break
                }
-               x2 := x1.Args[3]
-               if x2.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               mem := x2.Args[3]
-               if ptr != x2.Args[0] || idx != x2.Args[1] {
-                       break
-               }
-               x2_2 := x2.Args[2]
-               if x2_2.Op != OpARM64UBFX || auxIntToArm64BitField(x2_2.AuxInt) != armBFAuxInt(24, 8) || w != x2_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstoreidx)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, typ.UInt32)
-               v0.AddArg(w)
-               v.AddArg4(ptr, idx, v0, mem)
-               return true
-       }
-       // match: (MOVBstoreidx ptr idx w x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)
-       // result: (MOVWstoreidx ptr idx w mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               w := v_2
-               x0 := v_3
-               if x0.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               _ = x0.Args[3]
-               if ptr != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64ADDconst || auxIntToInt64(x0_1.AuxInt) != 1 || idx != x0_1.Args[0] {
-                       break
-               }
-               x0_2 := x0.Args[2]
-               if x0_2.Op != OpARM64UBFX || auxIntToArm64BitField(x0_2.AuxInt) != armBFAuxInt(8, 24) || w != x0_2.Args[0] {
-                       break
-               }
-               x1 := x0.Args[3]
-               if x1.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               _ = x1.Args[3]
-               if ptr != x1.Args[0] {
-                       break
-               }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst || auxIntToInt64(x1_1.AuxInt) != 2 || idx != x1_1.Args[0] {
-                       break
-               }
-               x1_2 := x1.Args[2]
-               if x1_2.Op != OpARM64UBFX || auxIntToArm64BitField(x1_2.AuxInt) != armBFAuxInt(16, 16) || w != x1_2.Args[0] {
-                       break
-               }
-               x2 := x1.Args[3]
-               if x2.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               mem := x2.Args[3]
-               if ptr != x2.Args[0] {
-                       break
-               }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst || auxIntToInt64(x2_1.AuxInt) != 3 || idx != x2_1.Args[0] {
-                       break
-               }
-               x2_2 := x2.Args[2]
-               if x2_2.Op != OpARM64UBFX || auxIntToArm64BitField(x2_2.AuxInt) != armBFAuxInt(24, 8) || w != x2_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg4(ptr, idx, w, mem)
-               return true
-       }
-       // match: (MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(8, 8)] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstoreidx ptr idx (REV16W <typ.UInt16> w) mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64ADDconst || auxIntToInt64(v_1.AuxInt) != 1 {
-                       break
-               }
-               idx := v_1.Args[0]
-               w := v_2
-               x := v_3
-               if x.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               mem := x.Args[3]
-               if ptr != x.Args[0] || idx != x.Args[1] {
-                       break
-               }
-               x_2 := x.Args[2]
-               if x_2.Op != OpARM64UBFX || auxIntToArm64BitField(x_2.AuxInt) != armBFAuxInt(8, 8) || w != x_2.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVHstoreidx)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, typ.UInt16)
-               v0.AddArg(w)
-               v.AddArg4(ptr, idx, v0, mem)
-               return true
-       }
-       // match: (MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 8)] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstoreidx ptr idx w mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               w := v_2
-               x := v_3
-               if x.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               mem := x.Args[3]
-               if ptr != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64ADDconst || auxIntToInt64(x_1.AuxInt) != 1 || idx != x_1.Args[0] {
-                       break
-               }
-               x_2 := x.Args[2]
-               if x_2.Op != OpARM64UBFX || auxIntToArm64BitField(x_2.AuxInt) != armBFAuxInt(8, 8) || w != x_2.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg4(ptr, idx, w, mem)
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BitFieldToAuxInt(bfc)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBstorezero(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
-       // match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVBstorezero [off1+int32(off2)] {sym} ptr mem)
+       // result: (MOVHload [off1+int32(off2)] {sym} ptr mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -10596,41 +10407,38 @@ func rewriteValueARM64_OpARM64MOVBstorezero(v *Value) bool {
                if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVBstorezero)
+               v.reset(OpARM64MOVHload)
                v.AuxInt = int32ToAuxInt(off1 + int32(off2))
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MOVHload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHloadidx ptr idx mem)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpARM64MOVDaddr {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
+               idx := v_0.Args[1]
                ptr := v_0.Args[0]
                mem := v_1
-               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64MOVHloadidx)
+               v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVBstorezero [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVHload [off] {sym} (ADDshiftLL [1] ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVBstorezeroidx ptr idx mem)
+       // result: (MOVHloadidx2 ptr idx mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
                        break
                }
                idx := v_0.Args[1]
@@ -10639,78 +10447,61 @@ func rewriteValueARM64_OpARM64MOVBstorezero(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVBstorezeroidx)
+               v.reset(OpARM64MOVHloadidx2)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(int64(i),int64(j),1) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
+       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               x := v_1
-               if x.Op != OpARM64MOVBstorezero {
-                       break
-               }
-               j := auxIntToInt32(x.AuxInt)
-               if auxToSym(x.Aux) != s {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               mem := x.Args[1]
-               ptr1 := x.Args[0]
-               if !(x.Uses == 1 && areAdjacentOffsets(int64(i), int64(j), 1) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = int32ToAuxInt(int32(min(int64(i), int64(j))))
-               v.Aux = symToAux(s)
-               v.AddArg2(ptr0, mem)
+               v.reset(OpARM64MOVHload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstorezeroidx ptr1 idx1 mem)
+       // match: (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpARM64MOVHstorezero {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       x := v_1
-                       if x.Op != OpARM64MOVBstorezeroidx {
-                               continue
-                       }
-                       mem := x.Args[2]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVHstorezeroidx)
-                       v.AddArg3(ptr1, idx1, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(0)
+               return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBstorezeroidx(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHloadidx(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVBstorezeroidx ptr (MOVDconst [c]) mem)
+       // match: (MOVHloadidx ptr (MOVDconst [c]) mem)
        // cond: is32Bit(c)
-       // result: (MOVBstorezero [int32(c)] ptr mem)
+       // result: (MOVHload [int32(c)] ptr mem)
        for {
                ptr := v_0
                if v_1.Op != OpARM64MOVDconst {
@@ -10721,386 +10512,323 @@ func rewriteValueARM64_OpARM64MOVBstorezeroidx(v *Value) bool {
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVBstorezero)
+               v.reset(OpARM64MOVHload)
                v.AuxInt = int32ToAuxInt(int32(c))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstorezeroidx (MOVDconst [c]) idx mem)
+       // match: (MOVHloadidx (MOVDconst [c]) ptr mem)
        // cond: is32Bit(c)
-       // result: (MOVBstorezero [int32(c)] idx mem)
+       // result: (MOVHload [int32(c)] ptr mem)
        for {
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := auxIntToInt64(v_0.AuxInt)
-               idx := v_1
+               ptr := v_1
                mem := v_2
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVBstorezero)
+               v.reset(OpARM64MOVHload)
                v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg2(idx, mem)
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstorezeroidx ptr idx mem)
+       // match: (MOVHloadidx ptr (SLLconst [1] idx) mem)
+       // result: (MOVHloadidx2 ptr idx mem)
        for {
                ptr := v_0
-               if v_1.Op != OpARM64ADDconst || auxIntToInt64(v_1.AuxInt) != 1 {
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 1 {
                        break
                }
                idx := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVBstorezeroidx {
-                       break
-               }
-               mem := x.Args[2]
-               if ptr != x.Args[0] || idx != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVHstorezeroidx)
+               mem := v_2
+               v.reset(OpARM64MOVHloadidx2)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _))
-       // result: (FMOVDfpgp val)
+       // match: (MOVHloadidx ptr (ADD idx idx) mem)
+       // result: (MOVHloadidx2 ptr idx mem)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpARM64FMOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+               if v_1.Op != OpARM64ADD {
                        break
                }
-               val := v_1.Args[1]
-               if ptr != v_1.Args[0] {
+               idx := v_1.Args[1]
+               if idx != v_1.Args[0] {
                        break
                }
-               v.reset(OpARM64FMOVDfpgp)
-               v.AddArg(val)
+               mem := v_2
+               v.reset(OpARM64MOVHloadidx2)
+               v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVDload [off1+int32(off2)] {sym} ptr mem)
+       // match: (MOVHloadidx (ADD idx idx) ptr mem)
+       // result: (MOVHloadidx2 ptr idx mem)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDconst {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               off2 := auxIntToInt64(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+               idx := v_0.Args[1]
+               if idx != v_0.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               ptr := v_1
+               mem := v_2
+               v.reset(OpARM64MOVHloadidx2)
+               v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVDload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVDloadidx ptr idx mem)
+       // match: (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVHstorezeroidx {
                        break
                }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(off == 0 && sym == nil) {
+               idx2 := v_2.Args[1]
+               ptr2 := v_2.Args[0]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               v.reset(OpARM64MOVDloadidx)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVDloadidx8 ptr idx mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
-                       break
-               }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(off == 0 && sym == nil) {
-                       break
-               }
-               v.reset(OpARM64MOVDloadidx8)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(0)
                return true
        }
-       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHloadidx2(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVHloadidx2 ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c<<1)
+       // result: (MOVHload [int32(c)<<1] ptr mem)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpARM64MOVDaddr {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c << 1)) {
                        break
                }
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.reset(OpARM64MOVHload)
+               v.AuxInt = int32ToAuxInt(int32(c) << 1)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // match: (MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
        // result: (MOVDconst [0])
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpARM64MOVDstorezero {
+               idx := v_1
+               if v_2.Op != OpARM64MOVHstorezeroidx2 {
                        break
                }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               idx2 := v_2.Args[1]
+               ptr2 := v_2.Args[0]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
                        break
                }
                v.reset(OpARM64MOVDconst)
                v.AuxInt = int64ToAuxInt(0)
                return true
        }
-       // match: (MOVDload [off] {sym} (SB) _)
-       // cond: symIsRO(sym)
-       // result: (MOVDconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))])
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpSB || !(symIsRO(sym)) {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder)))
-               return true
-       }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDloadidx(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueARM64_OpARM64MOVHreg(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (MOVDloadidx ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c)
-       // result: (MOVDload [int32(c)] ptr mem)
+       // match: (MOVHreg x:(MOVBload _ _))
+       // result: (MOVDreg x)
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               if !(is32Bit(c)) {
+               x := v_0
+               if x.Op != OpARM64MOVBload {
                        break
                }
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVDloadidx (MOVDconst [c]) ptr mem)
-       // cond: is32Bit(c)
-       // result: (MOVDload [int32(c)] ptr mem)
+       // match: (MOVHreg x:(MOVBUload _ _))
+       // result: (MOVDreg x)
        for {
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               ptr := v_1
-               mem := v_2
-               if !(is32Bit(c)) {
+               x := v_0
+               if x.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVDloadidx ptr (SLLconst [3] idx) mem)
-       // result: (MOVDloadidx8 ptr idx mem)
+       // match: (MOVHreg x:(MOVHload _ _))
+       // result: (MOVDreg x)
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
+               x := v_0
+               if x.Op != OpARM64MOVHload {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v_2
-               v.reset(OpARM64MOVDloadidx8)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVDloadidx (SLLconst [3] idx) ptr mem)
-       // result: (MOVDloadidx8 ptr idx mem)
+       // match: (MOVHreg x:(MOVBloadidx _ _ _))
+       // result: (MOVDreg x)
        for {
-               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
+               x := v_0
+               if x.Op != OpARM64MOVBloadidx {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v_1
-               mem := v_2
-               v.reset(OpARM64MOVDloadidx8)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
+       // match: (MOVHreg x:(MOVBUloadidx _ _ _))
+       // result: (MOVDreg x)
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVDstorezeroidx {
-                       break
-               }
-               idx2 := v_2.Args[1]
-               ptr2 := v_2.Args[0]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               x := v_0
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDloadidx8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVDloadidx8 ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c<<3)
-       // result: (MOVDload [int32(c)<<3] ptr mem)
+       // match: (MOVHreg x:(MOVHloadidx _ _ _))
+       // result: (MOVDreg x)
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               if !(is32Bit(c << 3)) {
+               x := v_0
+               if x.Op != OpARM64MOVHloadidx {
                        break
                }
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = int32ToAuxInt(int32(c) << 3)
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _))
-       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
-       // result: (MOVDconst [0])
+       // match: (MOVHreg x:(MOVHloadidx2 _ _ _))
+       // result: (MOVDreg x)
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVDstorezeroidx8 {
+               x := v_0
+               if x.Op != OpARM64MOVHloadidx2 {
                        break
                }
-               idx2 := v_2.Args[1]
-               ptr2 := v_2.Args[0]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVBreg _))
+       // result: (MOVDreg x)
+       for {
+               x := v_0
+               if x.Op != OpARM64MOVBreg {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDnop(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (MOVDnop (MOVDconst [c]))
-       // result: (MOVDconst [c])
+       // match: (MOVHreg x:(MOVBUreg _))
+       // result: (MOVDreg x)
        for {
-               if v_0.Op != OpARM64MOVDconst {
+               x := v_0
+               if x.Op != OpARM64MOVBUreg {
                        break
                }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(c)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDreg(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (MOVDreg x)
-       // cond: x.Uses == 1
-       // result: (MOVDnop x)
+       // match: (MOVHreg x:(MOVHreg _))
+       // result: (MOVDreg x)
        for {
                x := v_0
-               if !(x.Uses == 1) {
+               if x.Op != OpARM64MOVHreg {
                        break
                }
-               v.reset(OpARM64MOVDnop)
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVDreg (MOVDconst [c]))
-       // result: (MOVDconst [c])
+       // match: (MOVHreg (MOVDconst [c]))
+       // result: (MOVDconst [int64(int16(c))])
        for {
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := auxIntToInt64(v_0.AuxInt)
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(c)
+               v.AuxInt = int64ToAuxInt(int64(int16(c)))
+               return true
+       }
+       // match: (MOVHreg <t> (ANDconst x [c]))
+       // cond: uint64(c) & uint64(0xffffffffffff8000) == 0
+       // result: (ANDconst <t> x [c])
+       for {
+               t := v.Type
+               if v_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c := auxIntToInt64(v_0.AuxInt)
+               x := v_0.Args[0]
+               if !(uint64(c)&uint64(0xffffffffffff8000) == 0) {
+                       break
+               }
+               v.reset(OpARM64ANDconst)
+               v.Type = t
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg (SLLconst [lc] x))
+       // cond: lc < 16
+       // result: (SBFIZ [armBFAuxInt(lc, 16-lc)] x)
+       for {
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               lc := auxIntToInt64(v_0.AuxInt)
+               x := v_0.Args[0]
+               if !(lc < 16) {
+                       break
+               }
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(lc, 16-lc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg (SBFX [bfc] x))
+       // cond: bfc.getARM64BFwidth() <= 16
+       // result: (SBFX [bfc] x)
+       for {
+               if v_0.Op != OpARM64SBFX {
+                       break
+               }
+               bfc := auxIntToArm64BitField(v_0.AuxInt)
+               x := v_0.Args[0]
+               if !(bfc.getARM64BFwidth() <= 16) {
+                       break
+               }
+               v.reset(OpARM64SBFX)
+               v.AuxInt = arm64BitFieldToAuxInt(bfc)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDstore(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
-       // match: (MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem)
-       // result: (FMOVDstore [off] {sym} ptr val mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpARM64FMOVDfpgp {
-                       break
-               }
-               val := v_1.Args[0]
-               mem := v_2
-               v.reset(OpARM64FMOVDstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVDstore [off1+int32(off2)] {sym} ptr val mem)
+       // result: (MOVHstore [off1+int32(off2)] {sym} ptr val mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11114,15 +10842,15 @@ func rewriteValueARM64_OpARM64MOVDstore(v *Value) bool {
                if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = int32ToAuxInt(off1 + int32(off2))
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem)
+       // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVDstoreidx ptr idx val mem)
+       // result: (MOVHstoreidx ptr idx val mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11136,17 +10864,17 @@ func rewriteValueARM64_OpARM64MOVDstore(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
+               v.reset(OpARM64MOVHstoreidx)
                v.AddArg4(ptr, idx, val, mem)
                return true
        }
-       // match: (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem)
+       // match: (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVDstoreidx8 ptr idx val mem)
+       // result: (MOVHstoreidx2 ptr idx val mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
                        break
                }
                idx := v_0.Args[1]
@@ -11156,13 +10884,13 @@ func rewriteValueARM64_OpARM64MOVDstore(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx8)
+               v.reset(OpARM64MOVHstoreidx2)
                v.AddArg4(ptr, idx, val, mem)
                return true
        }
-       // match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -11177,14 +10905,14 @@ func rewriteValueARM64_OpARM64MOVDstore(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
-       // result: (MOVDstorezero [off] {sym} ptr mem)
+       // match: (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // result: (MOVHstorezero [off] {sym} ptr mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11193,22 +10921,90 @@ func rewriteValueARM64_OpARM64MOVDstore(v *Value) bool {
                        break
                }
                mem := v_2
-               v.reset(OpARM64MOVDstorezero)
+               v.reset(OpARM64MOVHstorezero)
                v.AuxInt = int32ToAuxInt(off)
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
+       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
+       // result: (MOVHstore [off] {sym} ptr x mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpARM64MOVHreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} ptr (MOVHUreg x) mem)
+       // result: (MOVHstore [off] {sym} ptr x mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpARM64MOVHUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} ptr (MOVWreg x) mem)
+       // result: (MOVHstore [off] {sym} ptr x mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} ptr (MOVWUreg x) mem)
+       // result: (MOVHstore [off] {sym} ptr x mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDstoreidx(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHstoreidx(v *Value) bool {
        v_3 := v.Args[3]
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVDstoreidx ptr (MOVDconst [c]) val mem)
+       // match: (MOVHstoreidx ptr (MOVDconst [c]) val mem)
        // cond: is32Bit(c)
-       // result: (MOVDstore [int32(c)] ptr val mem)
+       // result: (MOVHstore [int32(c)] ptr val mem)
        for {
                ptr := v_0
                if v_1.Op != OpARM64MOVDconst {
@@ -11220,14 +11016,14 @@ func rewriteValueARM64_OpARM64MOVDstoreidx(v *Value) bool {
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = int32ToAuxInt(int32(c))
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVDstoreidx (MOVDconst [c]) idx val mem)
+       // match: (MOVHstoreidx (MOVDconst [c]) idx val mem)
        // cond: is32Bit(c)
-       // result: (MOVDstore [int32(c)] idx val mem)
+       // result: (MOVHstore [int32(c)] idx val mem)
        for {
                if v_0.Op != OpARM64MOVDconst {
                        break
@@ -11239,41 +11035,75 @@ func rewriteValueARM64_OpARM64MOVDstoreidx(v *Value) bool {
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = int32ToAuxInt(int32(c))
                v.AddArg3(idx, val, mem)
                return true
        }
-       // match: (MOVDstoreidx ptr (SLLconst [3] idx) val mem)
-       // result: (MOVDstoreidx8 ptr idx val mem)
+       // match: (MOVHstoreidx ptr (SLLconst [1] idx) val mem)
+       // result: (MOVHstoreidx2 ptr idx val mem)
        for {
                ptr := v_0
-               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 1 {
                        break
                }
                idx := v_1.Args[0]
                val := v_2
                mem := v_3
-               v.reset(OpARM64MOVDstoreidx8)
+               v.reset(OpARM64MOVHstoreidx2)
                v.AddArg4(ptr, idx, val, mem)
                return true
        }
-       // match: (MOVDstoreidx (SLLconst [3] idx) ptr val mem)
-       // result: (MOVDstoreidx8 ptr idx val mem)
+       // match: (MOVHstoreidx ptr (ADD idx idx) val mem)
+       // result: (MOVHstoreidx2 ptr idx val mem)
        for {
-               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
+               ptr := v_0
+               if v_1.Op != OpARM64ADD {
+                       break
+               }
+               idx := v_1.Args[1]
+               if idx != v_1.Args[0] {
+                       break
+               }
+               val := v_2
+               mem := v_3
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (MOVHstoreidx (SLLconst [1] idx) ptr val mem)
+       // result: (MOVHstoreidx2 ptr idx val mem)
+       for {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 1 {
                        break
                }
                idx := v_0.Args[0]
                ptr := v_1
                val := v_2
                mem := v_3
-               v.reset(OpARM64MOVDstoreidx8)
+               v.reset(OpARM64MOVHstoreidx2)
                v.AddArg4(ptr, idx, val, mem)
                return true
        }
-       // match: (MOVDstoreidx ptr idx (MOVDconst [0]) mem)
-       // result: (MOVDstorezeroidx ptr idx mem)
+       // match: (MOVHstoreidx (ADD idx idx) ptr val mem)
+       // result: (MOVHstoreidx2 ptr idx val mem)
+       for {
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               idx := v_0.Args[1]
+               if idx != v_0.Args[0] {
+                       break
+               }
+               ptr := v_1
+               val := v_2
+               mem := v_3
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (MOVHstoreidx ptr idx (MOVDconst [0]) mem)
+       // result: (MOVHstorezeroidx ptr idx mem)
        for {
                ptr := v_0
                idx := v_1
@@ -11281,38 +11111,94 @@ func rewriteValueARM64_OpARM64MOVDstoreidx(v *Value) bool {
                        break
                }
                mem := v_3
-               v.reset(OpARM64MOVDstorezeroidx)
+               v.reset(OpARM64MOVHstorezeroidx)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDstoreidx8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVDstoreidx8 ptr (MOVDconst [c]) val mem)
-       // cond: is32Bit(c<<3)
-       // result: (MOVDstore [int32(c)<<3] ptr val mem)
+       // match: (MOVHstoreidx ptr idx (MOVHreg x) mem)
+       // result: (MOVHstoreidx ptr idx x mem)
        for {
                ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
+               idx := v_1
+               if v_2.Op != OpARM64MOVHreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       // match: (MOVHstoreidx ptr idx (MOVHUreg x) mem)
+       // result: (MOVHstoreidx ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVHUreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       // match: (MOVHstoreidx ptr idx (MOVWreg x) mem)
+       // result: (MOVHstoreidx ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       // match: (MOVHstoreidx ptr idx (MOVWUreg x) mem)
+       // result: (MOVHstoreidx ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstoreidx2(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVHstoreidx2 ptr (MOVDconst [c]) val mem)
+       // cond: is32Bit(c<<1)
+       // result: (MOVHstore [int32(c)<<1] ptr val mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := auxIntToInt64(v_1.AuxInt)
                val := v_2
                mem := v_3
-               if !(is32Bit(c << 3)) {
+               if !(is32Bit(c << 1)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = int32ToAuxInt(int32(c) << 3)
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = int32ToAuxInt(int32(c) << 1)
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem)
-       // result: (MOVDstorezeroidx8 ptr idx mem)
+       // match: (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
                ptr := v_0
                idx := v_1
@@ -11320,20 +11206,76 @@ func rewriteValueARM64_OpARM64MOVDstoreidx8(v *Value) bool {
                        break
                }
                mem := v_3
-               v.reset(OpARM64MOVDstorezeroidx8)
+               v.reset(OpARM64MOVHstorezeroidx2)
                v.AddArg3(ptr, idx, mem)
                return true
        }
+       // match: (MOVHstoreidx2 ptr idx (MOVHreg x) mem)
+       // result: (MOVHstoreidx2 ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVHreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       // match: (MOVHstoreidx2 ptr idx (MOVHUreg x) mem)
+       // result: (MOVHstoreidx2 ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVHUreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       // match: (MOVHstoreidx2 ptr idx (MOVWreg x) mem)
+       // result: (MOVHstoreidx2 ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       // match: (MOVHstoreidx2 ptr idx (MOVWUreg x) mem)
+       // result: (MOVHstoreidx2 ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDstorezero(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHstorezero(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
-       // match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVDstorezero [off1+int32(off2)] {sym} ptr mem)
+       // result: (MOVHstorezero [off1+int32(off2)] {sym} ptr mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11346,15 +11288,15 @@ func rewriteValueARM64_OpARM64MOVDstorezero(v *Value) bool {
                if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
+               v.reset(OpARM64MOVHstorezero)
                v.AuxInt = int32ToAuxInt(off1 + int32(off2))
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -11368,15 +11310,15 @@ func rewriteValueARM64_OpARM64MOVDstorezero(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
+               v.reset(OpARM64MOVHstorezero)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVDstorezero [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVHstorezero [off] {sym} (ADD ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVDstorezeroidx ptr idx mem)
+       // result: (MOVHstorezeroidx ptr idx mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11389,17 +11331,17 @@ func rewriteValueARM64_OpARM64MOVDstorezero(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx)
+               v.reset(OpARM64MOVHstorezeroidx)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem)
+       // match: (MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVDstorezeroidx8 ptr idx mem)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
                        break
                }
                idx := v_0.Args[1]
@@ -11408,111 +11350,19 @@ func rewriteValueARM64_OpARM64MOVDstorezero(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx8)
+               v.reset(OpARM64MOVHstorezeroidx2)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(int64(i),int64(j),8) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVQstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               x := v_1
-               if x.Op != OpARM64MOVDstorezero {
-                       break
-               }
-               j := auxIntToInt32(x.AuxInt)
-               if auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[1]
-               ptr1 := x.Args[0]
-               if !(x.Uses == 1 && areAdjacentOffsets(int64(i), int64(j), 8) && isSamePtr(ptr0, ptr1) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = int32ToAuxInt(int32(min(int64(i), int64(j))))
-               v.Aux = symToAux(s)
-               v.AddArg2(ptr0, mem)
-               return true
-       }
-       // match: (MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVQstorezero [0] {s} p0 mem)
-       for {
-               if auxIntToInt32(v.AuxInt) != 8 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               if p0.Op != OpARM64ADD {
-                       break
-               }
-               _ = p0.Args[1]
-               p0_0 := p0.Args[0]
-               p0_1 := p0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, p0_0, p0_1 = _i0+1, p0_1, p0_0 {
-                       ptr0 := p0_0
-                       idx0 := p0_1
-                       x := v_1
-                       if x.Op != OpARM64MOVDstorezeroidx {
-                               continue
-                       }
-                       mem := x.Args[2]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVQstorezero)
-                       v.AuxInt = int32ToAuxInt(0)
-                       v.Aux = symToAux(s)
-                       v.AddArg2(p0, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVQstorezero [0] {s} p0 mem)
-       for {
-               if auxIntToInt32(v.AuxInt) != 8 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               p0 := v_0
-               if p0.Op != OpARM64ADDshiftLL || auxIntToInt64(p0.AuxInt) != 3 {
-                       break
-               }
-               idx0 := p0.Args[1]
-               ptr0 := p0.Args[0]
-               x := v_1
-               if x.Op != OpARM64MOVDstorezeroidx8 {
-                       break
-               }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = int32ToAuxInt(0)
-               v.Aux = symToAux(s)
-               v.AddArg2(p0, mem)
-               return true
-       }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDstorezeroidx(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHstorezeroidx(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVDstorezeroidx ptr (MOVDconst [c]) mem)
+       // match: (MOVHstorezeroidx ptr (MOVDconst [c]) mem)
        // cond: is32Bit(c)
-       // result: (MOVDstorezero [int32(c)] ptr mem)
+       // result: (MOVHstorezero [int32(c)] ptr mem)
        for {
                ptr := v_0
                if v_1.Op != OpARM64MOVDconst {
@@ -11523,14 +11373,14 @@ func rewriteValueARM64_OpARM64MOVDstorezeroidx(v *Value) bool {
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
+               v.reset(OpARM64MOVHstorezero)
                v.AuxInt = int32ToAuxInt(int32(c))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVDstorezeroidx (MOVDconst [c]) idx mem)
+       // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem)
        // cond: is32Bit(c)
-       // result: (MOVDstorezero [int32(c)] idx mem)
+       // result: (MOVHstorezero [int32(c)] idx mem)
        for {
                if v_0.Op != OpARM64MOVDconst {
                        break
@@ -11541,71 +11391,103 @@ func rewriteValueARM64_OpARM64MOVDstorezeroidx(v *Value) bool {
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
+               v.reset(OpARM64MOVHstorezero)
                v.AuxInt = int32ToAuxInt(int32(c))
                v.AddArg2(idx, mem)
                return true
        }
-       // match: (MOVDstorezeroidx ptr (SLLconst [3] idx) mem)
-       // result: (MOVDstorezeroidx8 ptr idx mem)
+       // match: (MOVHstorezeroidx ptr (SLLconst [1] idx) mem)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
                ptr := v_0
-               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 1 {
                        break
                }
                idx := v_1.Args[0]
                mem := v_2
-               v.reset(OpARM64MOVDstorezeroidx8)
+               v.reset(OpARM64MOVHstorezeroidx2)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVDstorezeroidx (SLLconst [3] idx) ptr mem)
-       // result: (MOVDstorezeroidx8 ptr idx mem)
+       // match: (MOVHstorezeroidx ptr (ADD idx idx) mem)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
+               ptr := v_0
+               if v_1.Op != OpARM64ADD {
+                       break
+               }
+               idx := v_1.Args[1]
+               if idx != v_1.Args[0] {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v_1
                mem := v_2
-               v.reset(OpARM64MOVDstorezeroidx8)
+               v.reset(OpARM64MOVHstorezeroidx2)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDstorezeroidx8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVDstorezeroidx8 ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c<<3)
-       // result: (MOVDstorezero [int32(c<<3)] ptr mem)
+       // match: (MOVHstorezeroidx (SLLconst [1] idx) ptr mem)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 1 {
                        break
                }
-               c := auxIntToInt64(v_1.AuxInt)
+               idx := v_0.Args[0]
+               ptr := v_1
                mem := v_2
-               if !(is32Bit(c << 3)) {
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (MOVHstorezeroidx (ADD idx idx) ptr mem)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
+       for {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = int32ToAuxInt(int32(c << 3))
+               idx := v_0.Args[1]
+               if idx != v_0.Args[0] {
+                       break
+               }
+               ptr := v_1
+               mem := v_2
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstorezeroidx2(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c<<1)
+       // result: (MOVHstorezero [int32(c<<1)] ptr mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c << 1)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = int32ToAuxInt(int32(c << 1))
                v.AddArg2(ptr, mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHUload(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVQstorezero(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
-       // match: (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVHUload [off1+int32(off2)] {sym} ptr mem)
+       // result: (MOVQstorezero [off1+int32(off2)] {sym} ptr mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11618,15 +11500,82 @@ func rewriteValueARM64_OpARM64MOVHUload(v *Value) bool {
                if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVHUload)
+               v.reset(OpARM64MOVQstorezero)
                v.AuxInt = int32ToAuxInt(off1 + int32(off2))
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHUload [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+                       break
+               }
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _))
+       // result: (FMOVSfpgp val)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpARM64FMOVSstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpARM64FMOVSfpgp)
+               v.AddArg(val)
+               return true
+       }
+       // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVWUload [off1+int32(off2)] {sym} ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := auxIntToInt64(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+                       break
+               }
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVWUload [off] {sym} (ADD ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVHUloadidx ptr idx mem)
+       // result: (MOVWUloadidx ptr idx mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11639,17 +11588,17 @@ func rewriteValueARM64_OpARM64MOVHUload(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHUloadidx)
+               v.reset(OpARM64MOVWUloadidx)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem)
+       // match: (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVHUloadidx2 ptr idx mem)
+       // result: (MOVWUloadidx4 ptr idx mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
                        break
                }
                idx := v_0.Args[1]
@@ -11658,13 +11607,13 @@ func rewriteValueARM64_OpARM64MOVHUload(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHUloadidx2)
+               v.reset(OpARM64MOVWUloadidx4)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -11678,20 +11627,20 @@ func rewriteValueARM64_OpARM64MOVHUload(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVHUload)
+               v.reset(OpARM64MOVWUload)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+       // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
        // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
        // result: (MOVDconst [0])
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpARM64MOVHstorezero {
+               if v_1.Op != OpARM64MOVWstorezero {
                        break
                }
                off2 := auxIntToInt32(v_1.AuxInt)
@@ -11704,9 +11653,9 @@ func rewriteValueARM64_OpARM64MOVHUload(v *Value) bool {
                v.AuxInt = int64ToAuxInt(0)
                return true
        }
-       // match: (MOVHUload [off] {sym} (SB) _)
+       // match: (MOVWUload [off] {sym} (SB) _)
        // cond: symIsRO(sym)
-       // result: (MOVDconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
+       // result: (MOVDconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))])
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -11714,18 +11663,18 @@ func rewriteValueARM64_OpARM64MOVHUload(v *Value) bool {
                        break
                }
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder)))
+               v.AuxInt = int64ToAuxInt(int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder)))
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHUloadidx(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVWUloadidx(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVHUloadidx ptr (MOVDconst [c]) mem)
+       // match: (MOVWUloadidx ptr (MOVDconst [c]) mem)
        // cond: is32Bit(c)
-       // result: (MOVHUload [int32(c)] ptr mem)
+       // result: (MOVWUload [int32(c)] ptr mem)
        for {
                ptr := v_0
                if v_1.Op != OpARM64MOVDconst {
@@ -11736,14 +11685,14 @@ func rewriteValueARM64_OpARM64MOVHUloadidx(v *Value) bool {
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVHUload)
+               v.reset(OpARM64MOVWUload)
                v.AuxInt = int32ToAuxInt(int32(c))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHUloadidx (MOVDconst [c]) ptr mem)
+       // match: (MOVWUloadidx (MOVDconst [c]) ptr mem)
        // cond: is32Bit(c)
-       // result: (MOVHUload [int32(c)] ptr mem)
+       // result: (MOVWUload [int32(c)] ptr mem)
        for {
                if v_0.Op != OpARM64MOVDconst {
                        break
@@ -11754,63 +11703,44 @@ func rewriteValueARM64_OpARM64MOVHUloadidx(v *Value) bool {
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVHUload)
+               v.reset(OpARM64MOVWUload)
                v.AuxInt = int32ToAuxInt(int32(c))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHUloadidx ptr (SLLconst [1] idx) mem)
-       // result: (MOVHUloadidx2 ptr idx mem)
+       // match: (MOVWUloadidx ptr (SLLconst [2] idx) mem)
+       // result: (MOVWUloadidx4 ptr idx mem)
        for {
                ptr := v_0
-               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 1 {
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
                        break
                }
                idx := v_1.Args[0]
                mem := v_2
-               v.reset(OpARM64MOVHUloadidx2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVHUloadidx ptr (ADD idx idx) mem)
-       // result: (MOVHUloadidx2 ptr idx mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64ADD {
-                       break
-               }
-               idx := v_1.Args[1]
-               if idx != v_1.Args[0] {
-                       break
-               }
-               mem := v_2
-               v.reset(OpARM64MOVHUloadidx2)
+               v.reset(OpARM64MOVWUloadidx4)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVHUloadidx (ADD idx idx) ptr mem)
-       // result: (MOVHUloadidx2 ptr idx mem)
+       // match: (MOVWUloadidx (SLLconst [2] idx) ptr mem)
+       // result: (MOVWUloadidx4 ptr idx mem)
        for {
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               idx := v_0.Args[1]
-               if idx != v_0.Args[0] {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
                        break
                }
+               idx := v_0.Args[0]
                ptr := v_1
                mem := v_2
-               v.reset(OpARM64MOVHUloadidx2)
+               v.reset(OpARM64MOVWUloadidx4)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
+       // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
        // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
        // result: (MOVDconst [0])
        for {
                ptr := v_0
                idx := v_1
-               if v_2.Op != OpARM64MOVHstorezeroidx {
+               if v_2.Op != OpARM64MOVWstorezeroidx {
                        break
                }
                idx2 := v_2.Args[1]
@@ -11824,13 +11754,13 @@ func rewriteValueARM64_OpARM64MOVHUloadidx(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHUloadidx2(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVWUloadidx4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVHUloadidx2 ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c<<1)
-       // result: (MOVHUload [int32(c)<<1] ptr mem)
+       // match: (MOVWUloadidx4 ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c<<2)
+       // result: (MOVWUload [int32(c)<<2] ptr mem)
        for {
                ptr := v_0
                if v_1.Op != OpARM64MOVDconst {
@@ -11838,21 +11768,21 @@ func rewriteValueARM64_OpARM64MOVHUloadidx2(v *Value) bool {
                }
                c := auxIntToInt64(v_1.AuxInt)
                mem := v_2
-               if !(is32Bit(c << 1)) {
+               if !(is32Bit(c << 2)) {
                        break
                }
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = int32ToAuxInt(int32(c) << 1)
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = int32ToAuxInt(int32(c) << 2)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _))
+       // match: (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
        // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
        // result: (MOVDconst [0])
        for {
                ptr := v_0
                idx := v_1
-               if v_2.Op != OpARM64MOVHstorezeroidx2 {
+               if v_2.Op != OpARM64MOVWstorezeroidx4 {
                        break
                }
                idx2 := v_2.Args[1]
@@ -11866,9 +11796,9 @@ func rewriteValueARM64_OpARM64MOVHUloadidx2(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHUreg(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVWUreg(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (MOVHUreg x:(MOVBUload _ _))
+       // match: (MOVWUreg x:(MOVBUload _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
@@ -11879,7 +11809,7 @@ func rewriteValueARM64_OpARM64MOVHUreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHUreg x:(MOVHUload _ _))
+       // match: (MOVWUreg x:(MOVHUload _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
@@ -11890,136 +11820,191 @@ func rewriteValueARM64_OpARM64MOVHUreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHUreg x:(MOVBUloadidx _ _ _))
+       // match: (MOVWUreg x:(MOVWUload _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVBUloadidx {
+               if x.Op != OpARM64MOVWUload {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHUreg x:(MOVHUloadidx _ _ _))
+       // match: (MOVWUreg x:(MOVBUloadidx _ _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVHUloadidx {
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHUreg x:(MOVHUloadidx2 _ _ _))
+       // match: (MOVWUreg x:(MOVHUloadidx _ _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVHUloadidx2 {
+               if x.Op != OpARM64MOVHUloadidx {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHUreg x:(MOVBUreg _))
+       // match: (MOVWUreg x:(MOVWUloadidx _ _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVBUreg {
+               if x.Op != OpARM64MOVWUloadidx {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHUreg x:(MOVHUreg _))
+       // match: (MOVWUreg x:(MOVHUloadidx2 _ _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVHUreg {
+               if x.Op != OpARM64MOVHUloadidx2 {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHUreg (ANDconst [c] x))
-       // result: (ANDconst [c&(1<<16-1)] x)
+       // match: (MOVWUreg x:(MOVWUloadidx4 _ _ _))
+       // result: (MOVDreg x)
        for {
-               if v_0.Op != OpARM64ANDconst {
+               x := v_0
+               if x.Op != OpARM64MOVWUloadidx4 {
                        break
                }
-               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVBUreg _))
+       // result: (MOVDreg x)
+       for {
+               x := v_0
+               if x.Op != OpARM64MOVBUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVHUreg _))
+       // result: (MOVDreg x)
+       for {
+               x := v_0
+               if x.Op != OpARM64MOVHUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVWUreg _))
+       // result: (MOVDreg x)
+       for {
+               x := v_0
+               if x.Op != OpARM64MOVWUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg (ANDconst [c] x))
+       // result: (ANDconst [c&(1<<32-1)] x)
+       for {
+               if v_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c := auxIntToInt64(v_0.AuxInt)
                x := v_0.Args[0]
                v.reset(OpARM64ANDconst)
-               v.AuxInt = int64ToAuxInt(c & (1<<16 - 1))
+               v.AuxInt = int64ToAuxInt(c & (1<<32 - 1))
                v.AddArg(x)
                return true
        }
-       // match: (MOVHUreg (MOVDconst [c]))
-       // result: (MOVDconst [int64(uint16(c))])
+       // match: (MOVWUreg (MOVDconst [c]))
+       // result: (MOVDconst [int64(uint32(c))])
        for {
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := auxIntToInt64(v_0.AuxInt)
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(int64(uint16(c)))
+               v.AuxInt = int64ToAuxInt(int64(uint32(c)))
                return true
        }
-       // match: (MOVHUreg (SLLconst [lc] x))
-       // cond: lc >= 16
+       // match: (MOVWUreg x)
+       // cond: zeroUpper32Bits(x, 3)
+       // result: x
+       for {
+               x := v_0
+               if !(zeroUpper32Bits(x, 3)) {
+                       break
+               }
+               v.copyOf(x)
+               return true
+       }
+       // match: (MOVWUreg (SLLconst [lc] x))
+       // cond: lc >= 32
        // result: (MOVDconst [0])
        for {
                if v_0.Op != OpARM64SLLconst {
                        break
                }
                lc := auxIntToInt64(v_0.AuxInt)
-               if !(lc >= 16) {
+               if !(lc >= 32) {
                        break
                }
                v.reset(OpARM64MOVDconst)
                v.AuxInt = int64ToAuxInt(0)
                return true
        }
-       // match: (MOVHUreg (SLLconst [lc] x))
-       // cond: lc < 16
-       // result: (UBFIZ [armBFAuxInt(lc, 16-lc)] x)
+       // match: (MOVWUreg (SLLconst [lc] x))
+       // cond: lc < 32
+       // result: (UBFIZ [armBFAuxInt(lc, 32-lc)] x)
        for {
                if v_0.Op != OpARM64SLLconst {
                        break
                }
                lc := auxIntToInt64(v_0.AuxInt)
                x := v_0.Args[0]
-               if !(lc < 16) {
+               if !(lc < 32) {
                        break
                }
                v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(lc, 16-lc))
+               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(lc, 32-lc))
                v.AddArg(x)
                return true
        }
-       // match: (MOVHUreg (SRLconst [rc] x))
-       // cond: rc < 16
-       // result: (UBFX [armBFAuxInt(rc, 16)] x)
+       // match: (MOVWUreg (SRLconst [rc] x))
+       // cond: rc < 32
+       // result: (UBFX [armBFAuxInt(rc, 32)] x)
        for {
                if v_0.Op != OpARM64SRLconst {
                        break
                }
                rc := auxIntToInt64(v_0.AuxInt)
                x := v_0.Args[0]
-               if !(rc < 16) {
+               if !(rc < 32) {
                        break
                }
                v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(rc, 16))
+               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(rc, 32))
                v.AddArg(x)
                return true
        }
-       // match: (MOVHUreg (UBFX [bfc] x))
-       // cond: bfc.getARM64BFwidth() <= 16
+       // match: (MOVWUreg (UBFX [bfc] x))
+       // cond: bfc.getARM64BFwidth() <= 32
        // result: (UBFX [bfc] x)
        for {
                if v_0.Op != OpARM64UBFX {
@@ -12027,7 +12012,7 @@ func rewriteValueARM64_OpARM64MOVHUreg(v *Value) bool {
                }
                bfc := auxIntToArm64BitField(v_0.AuxInt)
                x := v_0.Args[0]
-               if !(bfc.getARM64BFwidth() <= 16) {
+               if !(bfc.getARM64BFwidth() <= 32) {
                        break
                }
                v.reset(OpARM64UBFX)
@@ -12037,14 +12022,14 @@ func rewriteValueARM64_OpARM64MOVHUreg(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHload(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVWload(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
-       // match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVHload [off1+int32(off2)] {sym} ptr mem)
+       // result: (MOVWload [off1+int32(off2)] {sym} ptr mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -12057,15 +12042,15 @@ func rewriteValueARM64_OpARM64MOVHload(v *Value) bool {
                if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVHload)
+               v.reset(OpARM64MOVWload)
                v.AuxInt = int32ToAuxInt(off1 + int32(off2))
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHload [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVWload [off] {sym} (ADD ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVHloadidx ptr idx mem)
+       // result: (MOVWloadidx ptr idx mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -12078,17 +12063,17 @@ func rewriteValueARM64_OpARM64MOVHload(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHloadidx)
+               v.reset(OpARM64MOVWloadidx)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVHload [off] {sym} (ADDshiftLL [1] ptr idx) mem)
+       // match: (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVHloadidx2 ptr idx mem)
+       // result: (MOVWloadidx4 ptr idx mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
                        break
                }
                idx := v_0.Args[1]
@@ -12097,13 +12082,13 @@ func rewriteValueARM64_OpARM64MOVHload(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHloadidx2)
+               v.reset(OpARM64MOVWloadidx4)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -12117,20 +12102,20 @@ func rewriteValueARM64_OpARM64MOVHload(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVHload)
+               v.reset(OpARM64MOVWload)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+       // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
        // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
        // result: (MOVDconst [0])
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpARM64MOVHstorezero {
+               if v_1.Op != OpARM64MOVWstorezero {
                        break
                }
                off2 := auxIntToInt32(v_1.AuxInt)
@@ -12145,13 +12130,13 @@ func rewriteValueARM64_OpARM64MOVHload(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHloadidx(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVWloadidx(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVHloadidx ptr (MOVDconst [c]) mem)
+       // match: (MOVWloadidx ptr (MOVDconst [c]) mem)
        // cond: is32Bit(c)
-       // result: (MOVHload [int32(c)] ptr mem)
+       // result: (MOVWload [int32(c)] ptr mem)
        for {
                ptr := v_0
                if v_1.Op != OpARM64MOVDconst {
@@ -12162,14 +12147,14 @@ func rewriteValueARM64_OpARM64MOVHloadidx(v *Value) bool {
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVHload)
+               v.reset(OpARM64MOVWload)
                v.AuxInt = int32ToAuxInt(int32(c))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHloadidx (MOVDconst [c]) ptr mem)
+       // match: (MOVWloadidx (MOVDconst [c]) ptr mem)
        // cond: is32Bit(c)
-       // result: (MOVHload [int32(c)] ptr mem)
+       // result: (MOVWload [int32(c)] ptr mem)
        for {
                if v_0.Op != OpARM64MOVDconst {
                        break
@@ -12180,63 +12165,44 @@ func rewriteValueARM64_OpARM64MOVHloadidx(v *Value) bool {
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVHload)
+               v.reset(OpARM64MOVWload)
                v.AuxInt = int32ToAuxInt(int32(c))
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHloadidx ptr (SLLconst [1] idx) mem)
-       // result: (MOVHloadidx2 ptr idx mem)
+       // match: (MOVWloadidx ptr (SLLconst [2] idx) mem)
+       // result: (MOVWloadidx4 ptr idx mem)
        for {
                ptr := v_0
-               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 1 {
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
                        break
                }
                idx := v_1.Args[0]
                mem := v_2
-               v.reset(OpARM64MOVHloadidx2)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVHloadidx ptr (ADD idx idx) mem)
-       // result: (MOVHloadidx2 ptr idx mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64ADD {
-                       break
-               }
-               idx := v_1.Args[1]
-               if idx != v_1.Args[0] {
-                       break
-               }
-               mem := v_2
-               v.reset(OpARM64MOVHloadidx2)
+               v.reset(OpARM64MOVWloadidx4)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVHloadidx (ADD idx idx) ptr mem)
-       // result: (MOVHloadidx2 ptr idx mem)
+       // match: (MOVWloadidx (SLLconst [2] idx) ptr mem)
+       // result: (MOVWloadidx4 ptr idx mem)
        for {
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               idx := v_0.Args[1]
-               if idx != v_0.Args[0] {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
                        break
                }
+               idx := v_0.Args[0]
                ptr := v_1
                mem := v_2
-               v.reset(OpARM64MOVHloadidx2)
+               v.reset(OpARM64MOVWloadidx4)
                v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
+       // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
        // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
        // result: (MOVDconst [0])
        for {
                ptr := v_0
                idx := v_1
-               if v_2.Op != OpARM64MOVHstorezeroidx {
+               if v_2.Op != OpARM64MOVWstorezeroidx {
                        break
                }
                idx2 := v_2.Args[1]
@@ -12250,13 +12216,13 @@ func rewriteValueARM64_OpARM64MOVHloadidx(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHloadidx2(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVWloadidx4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVHloadidx2 ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c<<1)
-       // result: (MOVHload [int32(c)<<1] ptr mem)
+       // match: (MOVWloadidx4 ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c<<2)
+       // result: (MOVWload [int32(c)<<2] ptr mem)
        for {
                ptr := v_0
                if v_1.Op != OpARM64MOVDconst {
@@ -12264,21 +12230,21 @@ func rewriteValueARM64_OpARM64MOVHloadidx2(v *Value) bool {
                }
                c := auxIntToInt64(v_1.AuxInt)
                mem := v_2
-               if !(is32Bit(c << 1)) {
+               if !(is32Bit(c << 2)) {
                        break
                }
-               v.reset(OpARM64MOVHload)
-               v.AuxInt = int32ToAuxInt(int32(c) << 1)
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = int32ToAuxInt(int32(c) << 2)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _))
+       // match: (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
        // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
        // result: (MOVDconst [0])
        for {
                ptr := v_0
                idx := v_1
-               if v_2.Op != OpARM64MOVHstorezeroidx2 {
+               if v_2.Op != OpARM64MOVWstorezeroidx4 {
                        break
                }
                idx2 := v_2.Args[1]
@@ -12292,9 +12258,9 @@ func rewriteValueARM64_OpARM64MOVHloadidx2(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHreg(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVWreg(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (MOVHreg x:(MOVBload _ _))
+       // match: (MOVWreg x:(MOVBload _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
@@ -12305,7 +12271,7 @@ func rewriteValueARM64_OpARM64MOVHreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg x:(MOVBUload _ _))
+       // match: (MOVWreg x:(MOVBUload _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
@@ -12316,7 +12282,7 @@ func rewriteValueARM64_OpARM64MOVHreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg x:(MOVHload _ _))
+       // match: (MOVWreg x:(MOVHload _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
@@ -12327,105 +12293,182 @@ func rewriteValueARM64_OpARM64MOVHreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg x:(MOVBloadidx _ _ _))
+       // match: (MOVWreg x:(MOVHUload _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVBloadidx {
+               if x.Op != OpARM64MOVHUload {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg x:(MOVBUloadidx _ _ _))
+       // match: (MOVWreg x:(MOVWload _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVBUloadidx {
+               if x.Op != OpARM64MOVWload {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg x:(MOVHloadidx _ _ _))
+       // match: (MOVWreg x:(MOVBloadidx _ _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVHloadidx {
+               if x.Op != OpARM64MOVBloadidx {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg x:(MOVHloadidx2 _ _ _))
+       // match: (MOVWreg x:(MOVBUloadidx _ _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVHloadidx2 {
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg x:(MOVBreg _))
+       // match: (MOVWreg x:(MOVHloadidx _ _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVBreg {
+               if x.Op != OpARM64MOVHloadidx {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg x:(MOVBUreg _))
+       // match: (MOVWreg x:(MOVHUloadidx _ _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVBUreg {
+               if x.Op != OpARM64MOVHUloadidx {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg x:(MOVHreg _))
+       // match: (MOVWreg x:(MOVWloadidx _ _ _))
        // result: (MOVDreg x)
        for {
                x := v_0
-               if x.Op != OpARM64MOVHreg {
+               if x.Op != OpARM64MOVWloadidx {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg (MOVDconst [c]))
-       // result: (MOVDconst [int64(int16(c))])
+       // match: (MOVWreg x:(MOVHloadidx2 _ _ _))
+       // result: (MOVDreg x)
        for {
-               if v_0.Op != OpARM64MOVDconst {
+               x := v_0
+               if x.Op != OpARM64MOVHloadidx2 {
                        break
                }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(int64(int16(c)))
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVHreg <t> (ANDconst x [c]))
-       // cond: uint64(c) & uint64(0xffffffffffff8000) == 0
-       // result: (ANDconst <t> x [c])
+       // match: (MOVWreg x:(MOVHUloadidx2 _ _ _))
+       // result: (MOVDreg x)
        for {
-               t := v.Type
-               if v_0.Op != OpARM64ANDconst {
-                       break
+               x := v_0
+               if x.Op != OpARM64MOVHUloadidx2 {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWloadidx4 _ _ _))
+       // result: (MOVDreg x)
+       for {
+               x := v_0
+               if x.Op != OpARM64MOVWloadidx4 {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBreg _))
+       // result: (MOVDreg x)
+       for {
+               x := v_0
+               if x.Op != OpARM64MOVBreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBUreg _))
+       // result: (MOVDreg x)
+       for {
+               x := v_0
+               if x.Op != OpARM64MOVBUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHreg _))
+       // result: (MOVDreg x)
+       for {
+               x := v_0
+               if x.Op != OpARM64MOVHreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWreg _))
+       // result: (MOVDreg x)
+       for {
+               x := v_0
+               if x.Op != OpARM64MOVWreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg (MOVDconst [c]))
+       // result: (MOVDconst [int64(int32(c))])
+       for {
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(int64(int32(c)))
+               return true
+       }
+       // match: (MOVWreg <t> (ANDconst x [c]))
+       // cond: uint64(c) & uint64(0xffffffff80000000) == 0
+       // result: (ANDconst <t> x [c])
+       for {
+               t := v.Type
+               if v_0.Op != OpARM64ANDconst {
+                       break
                }
                c := auxIntToInt64(v_0.AuxInt)
                x := v_0.Args[0]
-               if !(uint64(c)&uint64(0xffffffffffff8000) == 0) {
+               if !(uint64(c)&uint64(0xffffffff80000000) == 0) {
                        break
                }
                v.reset(OpARM64ANDconst)
@@ -12434,25 +12477,25 @@ func rewriteValueARM64_OpARM64MOVHreg(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg (SLLconst [lc] x))
-       // cond: lc < 16
-       // result: (SBFIZ [armBFAuxInt(lc, 16-lc)] x)
+       // match: (MOVWreg (SLLconst [lc] x))
+       // cond: lc < 32
+       // result: (SBFIZ [armBFAuxInt(lc, 32-lc)] x)
        for {
                if v_0.Op != OpARM64SLLconst {
                        break
                }
                lc := auxIntToInt64(v_0.AuxInt)
                x := v_0.Args[0]
-               if !(lc < 16) {
+               if !(lc < 32) {
                        break
                }
                v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(lc, 16-lc))
+               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(lc, 32-lc))
                v.AddArg(x)
                return true
        }
-       // match: (MOVHreg (SBFX [bfc] x))
-       // cond: bfc.getARM64BFwidth() <= 16
+       // match: (MOVWreg (SBFX [bfc] x))
+       // cond: bfc.getARM64BFwidth() <= 32
        // result: (SBFX [bfc] x)
        for {
                if v_0.Op != OpARM64SBFX {
@@ -12460,7 +12503,7 @@ func rewriteValueARM64_OpARM64MOVHreg(v *Value) bool {
                }
                bfc := auxIntToArm64BitField(v_0.AuxInt)
                x := v_0.Args[0]
-               if !(bfc.getARM64BFwidth() <= 16) {
+               if !(bfc.getARM64BFwidth() <= 32) {
                        break
                }
                v.reset(OpARM64SBFX)
@@ -12470,15 +12513,32 @@ func rewriteValueARM64_OpARM64MOVHreg(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHstore(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVWstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
-       // match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // match: (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem)
+       // result: (FMOVSstore [off] {sym} ptr val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpARM64FMOVSfpgp {
+                       break
+               }
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64FMOVSstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVHstore [off1+int32(off2)] {sym} ptr val mem)
+       // result: (MOVWstore [off1+int32(off2)] {sym} ptr val mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -12492,15 +12552,15 @@ func rewriteValueARM64_OpARM64MOVHstore(v *Value) bool {
                if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
+               v.reset(OpARM64MOVWstore)
                v.AuxInt = int32ToAuxInt(off1 + int32(off2))
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem)
+       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVHstoreidx ptr idx val mem)
+       // result: (MOVWstoreidx ptr idx val mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -12514,17 +12574,17 @@ func rewriteValueARM64_OpARM64MOVHstore(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
+               v.reset(OpARM64MOVWstoreidx)
                v.AddArg4(ptr, idx, val, mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem)
+       // match: (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVHstoreidx2 ptr idx val mem)
+       // result: (MOVWstoreidx4 ptr idx val mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
                        break
                }
                idx := v_0.Args[1]
@@ -12534,13 +12594,13 @@ func rewriteValueARM64_OpARM64MOVHstore(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx2)
+               v.reset(OpARM64MOVWstoreidx4)
                v.AddArg4(ptr, idx, val, mem)
                return true
        }
-       // match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
        for {
                off1 := auxIntToInt32(v.AuxInt)
                sym1 := auxToSym(v.Aux)
@@ -12555,14 +12615,14 @@ func rewriteValueARM64_OpARM64MOVHstore(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
+               v.reset(OpARM64MOVWstore)
                v.AuxInt = int32ToAuxInt(off1 + off2)
                v.Aux = symToAux(mergeSym(sym1, sym2))
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem)
-       // result: (MOVHstorezero [off] {sym} ptr mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // result: (MOVWstorezero [off] {sym} ptr mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
@@ -12571,8647 +12631,3062 @@ func rewriteValueARM64_OpARM64MOVHstore(v *Value) bool {
                        break
                }
                mem := v_2
-               v.reset(OpARM64MOVHstorezero)
+               v.reset(OpARM64MOVWstorezero)
                v.AuxInt = int32ToAuxInt(off)
                v.Aux = symToAux(sym)
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
-       // result: (MOVHstore [off] {sym} ptr x mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
+       // result: (MOVWstore [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpARM64MOVHreg {
+               if v_1.Op != OpARM64MOVWreg {
                        break
                }
                x := v_1.Args[0]
                mem := v_2
-               v.reset(OpARM64MOVHstore)
+               v.reset(OpARM64MOVWstore)
                v.AuxInt = int32ToAuxInt(off)
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVHUreg x) mem)
-       // result: (MOVHstore [off] {sym} ptr x mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
+       // result: (MOVWstore [off] {sym} ptr x mem)
        for {
                off := auxIntToInt32(v.AuxInt)
                sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpARM64MOVHUreg {
+               if v_1.Op != OpARM64MOVWUreg {
                        break
                }
                x := v_1.Args[0]
                mem := v_2
-               v.reset(OpARM64MOVHstore)
+               v.reset(OpARM64MOVWstore)
                v.AuxInt = int32ToAuxInt(off)
                v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVWreg x) mem)
-       // result: (MOVHstore [off] {sym} ptr x mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstoreidx(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem)
+       // cond: is32Bit(c)
+       // result: (MOVWstore [int32(c)] ptr val mem)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
                ptr := v_0
-               if v_1.Op != OpARM64MOVWreg {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} ptr (MOVWUreg x) mem)
-       // result: (MOVHstore [off] {sym} ptr x mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpARM64MOVWUreg {
+               c := auxIntToInt64(v_1.AuxInt)
+               val := v_2
+               mem := v_3
+               if !(is32Bit(c)) {
                        break
                }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       // match: (MOVWstoreidx (MOVDconst [c]) idx val mem)
+       // cond: is32Bit(c)
+       // result: (MOVWstore [int32(c)] idx val mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVHstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := auxIntToInt64(v_0.AuxInt)
+               idx := v_1
+               val := v_2
+               mem := v_3
+               if !(is32Bit(c)) {
                        break
                }
                v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w, mem)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg3(idx, val, mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w mem)
+       // match: (MOVWstoreidx ptr (SLLconst [2] idx) val mem)
+       // result: (MOVWstoreidx4 ptr idx val mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 2 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               ptr := v_0
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 16 {
-                               continue
-                       }
-                       w := v_1.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVHstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if w != x.Args[2] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVWstoreidx)
-                       v.AddArg4(ptr1, idx1, w, mem)
-                       return true
-               }
-               break
+               idx := v_1.Args[0]
+               val := v_2
+               mem := v_3
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
        }
-       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
+       // match: (MOVWstoreidx (SLLconst [2] idx) ptr val mem)
+       // result: (MOVWstoreidx4 ptr idx val mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 2 {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
-                       break
-               }
-               idx0 := v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVHstoreidx2 {
+               idx := v_0.Args[0]
+               ptr := v_1
+               val := v_2
+               mem := v_3
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem)
+       // result: (MOVWstorezeroidx ptr idx mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
                        break
                }
-               mem := x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] || !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               mem := v_3
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr idx (MOVWreg x) mem)
+       // result: (MOVWstoreidx ptr idx x mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
+               x := v_2.Args[0]
+               mem := v_3
                v.reset(OpARM64MOVWstoreidx)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = int64ToAuxInt(1)
-               v0.AddArg(idx1)
-               v.AddArg4(ptr1, v0, w, mem)
+               v.AddArg4(ptr, idx, x, mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem)
+       // result: (MOVWstoreidx ptr idx x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64UBFX || auxIntToArm64BitField(v_1.AuxInt) != armBFAuxInt(16, 16) {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVHstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstoreidx4(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVWstoreidx4 ptr (MOVDconst [c]) val mem)
+       // cond: is32Bit(c<<2)
+       // result: (MOVWstore [int32(c)<<2] ptr val mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               val := v_2
+               mem := v_3
+               if !(is32Bit(c << 2)) {
                        break
                }
                v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w, mem)
+               v.AuxInt = int32ToAuxInt(int32(c) << 2)
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w mem)
+       // match: (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem)
+       // result: (MOVWstorezeroidx4 ptr idx mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 2 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64UBFX || auxIntToArm64BitField(v_1.AuxInt) != armBFAuxInt(16, 16) {
-                               continue
-                       }
-                       w := v_1.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVHstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if w != x.Args[2] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVWstoreidx)
-                       v.AddArg4(ptr1, idx1, w, mem)
-                       return true
-               }
-               break
+               mem := v_3
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg3(ptr, idx, mem)
+               return true
        }
-       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
+       // match: (MOVWstoreidx4 ptr idx (MOVWreg x) mem)
+       // result: (MOVWstoreidx4 ptr idx x mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 2 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
-                       break
-               }
-               idx0 := v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               if v_1.Op != OpARM64UBFX || auxIntToArm64BitField(v_1.AuxInt) != armBFAuxInt(16, 16) {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVHstoreidx2 {
-                       break
-               }
-               mem := x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] || !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = int64ToAuxInt(1)
-               v0.AddArg(idx1)
-               v.AddArg4(ptr1, v0, w, mem)
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg4(ptr, idx, x, mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       // match: (MOVWstoreidx4 ptr idx (MOVWUreg x) mem)
+       // result: (MOVWstoreidx4 ptr idx x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 16 {
-                       break
-               }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
-                       break
-               }
-               w := v_1_0.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVHstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w, mem)
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg4(ptr, idx, x, mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezero(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVWstorezero [off1+int32(off2)] {sym} ptr mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 2 {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               off2 := auxIntToInt64(v_0.AuxInt)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 16 {
-                               continue
-                       }
-                       v_1_0 := v_1.Args[0]
-                       if v_1_0.Op != OpARM64MOVDreg {
-                               continue
-                       }
-                       w := v_1_0.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVHstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if w != x.Args[2] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVWstoreidx)
-                       v.AddArg4(ptr1, idx1, w, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg2(ptr, mem)
+               return true
        }
-       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
+       // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 2 {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
                        break
                }
-               idx0 := v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 16 {
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstorezeroidx ptr idx mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(off == 0 && sym == nil) {
                        break
                }
-               w := v_1_0.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVHstoreidx2 {
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstorezeroidx4 ptr idx mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
                        break
                }
-               mem := x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] || !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = int64ToAuxInt(1)
-               v0.AddArg(idx1)
-               v.AddArg4(ptr1, v0, w, mem)
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg3(ptr, idx, mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w0 mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezeroidx(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c)
+       // result: (MOVWstorezero [int32(c)] ptr mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               j := auxIntToInt64(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVHstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst || auxIntToInt64(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w0, mem)
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w0 mem)
+       // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem)
+       // cond: is32Bit(c)
+       // result: (MOVWstorezero [int32(c)] idx mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 2 {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               c := auxIntToInt64(v_0.AuxInt)
+               idx := v_1
+               mem := v_2
+               if !(is32Bit(c)) {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64SRLconst {
-                               continue
-                       }
-                       j := auxIntToInt64(v_1.AuxInt)
-                       w := v_1.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVHstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       w0 := x.Args[2]
-                       if w0.Op != OpARM64SRLconst || auxIntToInt64(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVWstoreidx)
-                       v.AddArg4(ptr1, idx1, w0, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg2(idx, mem)
+               return true
        }
-       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
+       // match: (MOVWstorezeroidx ptr (SLLconst [2] idx) mem)
+       // result: (MOVWstorezeroidx4 ptr idx mem)
        for {
-               if auxIntToInt32(v.AuxInt) != 2 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
-                       break
-               }
-               idx0 := v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               j := auxIntToInt64(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVHstoreidx2 {
+               ptr := v_0
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
                        break
                }
-               mem := x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst || auxIntToInt64(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               idx := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (MOVWstorezeroidx (SLLconst [2] idx) ptr mem)
+       // result: (MOVWstorezeroidx4 ptr idx mem)
+       for {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = int64ToAuxInt(1)
-               v0.AddArg(idx1)
-               v.AddArg4(ptr1, v0, w0, mem)
+               idx := v_0.Args[0]
+               ptr := v_1
+               mem := v_2
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg3(ptr, idx, mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHstoreidx(v *Value) bool {
-       v_3 := v.Args[3]
+func rewriteValueARM64_OpARM64MOVWstorezeroidx4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVHstoreidx ptr (MOVDconst [c]) val mem)
-       // cond: is32Bit(c)
-       // result: (MOVHstore [int32(c)] ptr val mem)
+       // match: (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c<<2)
+       // result: (MOVWstorezero [int32(c<<2)] ptr mem)
        for {
                ptr := v_0
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := auxIntToInt64(v_1.AuxInt)
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c)) {
+               mem := v_2
+               if !(is32Bit(c << 2)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg3(ptr, val, mem)
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = int32ToAuxInt(int32(c << 2))
+               v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHstoreidx (MOVDconst [c]) idx val mem)
-       // cond: is32Bit(c)
-       // result: (MOVHstore [int32(c)] idx val mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MSUB(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MSUB a x (MOVDconst [-1]))
+       // result: (ADD a x)
        for {
-               if v_0.Op != OpARM64MOVDconst {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != -1 {
                        break
                }
-               c := auxIntToInt64(v_0.AuxInt)
-               idx := v_1
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c)) {
+               v.reset(OpARM64ADD)
+               v.AddArg2(a, x)
+               return true
+       }
+       // match: (MSUB a _ (MOVDconst [0]))
+       // result: a
+       for {
+               a := v_0
+               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg3(idx, val, mem)
+               v.copyOf(a)
                return true
        }
-       // match: (MOVHstoreidx ptr (SLLconst [1] idx) val mem)
-       // result: (MOVHstoreidx2 ptr idx val mem)
+       // match: (MSUB a x (MOVDconst [1]))
+       // result: (SUB a x)
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 1 {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 1 {
                        break
                }
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg4(ptr, idx, val, mem)
+               v.reset(OpARM64SUB)
+               v.AddArg2(a, x)
                return true
        }
-       // match: (MOVHstoreidx ptr (ADD idx idx) val mem)
-       // result: (MOVHstoreidx2 ptr idx val mem)
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c)
+       // result: (SUBshiftLL a x [log64(c)])
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64ADD {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_1.Args[1]
-               if idx != v_1.Args[0] {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(isPowerOfTwo64(c)) {
                        break
                }
-               val := v_2
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg4(ptr, idx, val, mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c))
+               v.AddArg2(a, x)
                return true
        }
-       // match: (MOVHstoreidx (SLLconst [1] idx) ptr val mem)
-       // result: (MOVHstoreidx2 ptr idx val mem)
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c-1) && c>=3
+       // result: (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
        for {
-               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 1 {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v_1
-               val := v_2
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg4(ptr, idx, val, mem)
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(isPowerOfTwo64(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64SUB)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(log64(c - 1))
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstoreidx (ADD idx idx) ptr val mem)
-       // result: (MOVHstoreidx2 ptr idx val mem)
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c+1) && c>=7
+       // result: (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
        for {
-               if v_0.Op != OpARM64ADD {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_0.Args[1]
-               if idx != v_0.Args[0] {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(isPowerOfTwo64(c+1) && c >= 7) {
                        break
                }
-               ptr := v_1
-               val := v_2
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg4(ptr, idx, val, mem)
+               v.reset(OpARM64ADD)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(log64(c + 1))
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVDconst [0]) mem)
-       // result: (MOVHstorezeroidx ptr idx mem)
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo64(c/3)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               mem := v_3
-               v.reset(OpARM64MOVHstorezeroidx)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVHstoreidx ptr idx (MOVHreg x) mem)
-       // result: (MOVHstoreidx ptr idx x mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVHreg {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(c%3 == 0 && isPowerOfTwo64(c/3)) {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 3))
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(2)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVHUreg x) mem)
-       // result: (MOVHstoreidx ptr idx x mem)
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo64(c/5)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVHUreg {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(c%5 == 0 && isPowerOfTwo64(c/5)) {
+                       break
+               }
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 5))
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(2)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVWreg x) mem)
-       // result: (MOVHstoreidx ptr idx x mem)
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo64(c/7)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWreg {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
-               return true
-       }
-       // match: (MOVHstoreidx ptr idx (MOVWUreg x) mem)
-       // result: (MOVHstoreidx ptr idx x mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWUreg {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(c%7 == 0 && isPowerOfTwo64(c/7)) {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 7))
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(3)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx ptr idx w mem)
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo64(c/9)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64ADDconst || auxIntToInt64(v_1.AuxInt) != 2 {
-                       break
-               }
-               idx := v_1.Args[0]
-               if v_2.Op != OpARM64SRLconst || auxIntToInt64(v_2.AuxInt) != 16 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v_3
-               if x.Op != OpARM64MOVHstoreidx {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if ptr != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(c%9 == 0 && isPowerOfTwo64(c/9)) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg4(ptr, idx, w, mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 9))
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(3)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstoreidx2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVHstoreidx2 ptr (MOVDconst [c]) val mem)
-       // cond: is32Bit(c<<1)
-       // result: (MOVHstore [int32(c)<<1] ptr val mem)
+       // match: (MSUB a (MOVDconst [-1]) x)
+       // result: (ADD a x)
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c << 1)) {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != -1 {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = int32ToAuxInt(int32(c) << 1)
-               v.AddArg3(ptr, val, mem)
+               x := v_2
+               v.reset(OpARM64ADD)
+               v.AddArg2(a, x)
                return true
        }
-       // match: (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem)
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // match: (MSUB a (MOVDconst [0]) _)
+       // result: a
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != 0 {
                        break
                }
-               mem := v_3
-               v.reset(OpARM64MOVHstorezeroidx2)
-               v.AddArg3(ptr, idx, mem)
+               v.copyOf(a)
                return true
        }
-       // match: (MOVHstoreidx2 ptr idx (MOVHreg x) mem)
-       // result: (MOVHstoreidx2 ptr idx x mem)
+       // match: (MSUB a (MOVDconst [1]) x)
+       // result: (SUB a x)
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVHreg {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != 1 {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg4(ptr, idx, x, mem)
+               x := v_2
+               v.reset(OpARM64SUB)
+               v.AddArg2(a, x)
                return true
        }
-       // match: (MOVHstoreidx2 ptr idx (MOVHUreg x) mem)
-       // result: (MOVHstoreidx2 ptr idx x mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo64(c)
+       // result: (SUBshiftLL a x [log64(c)])
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVHUreg {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg4(ptr, idx, x, mem)
-               return true
-       }
-       // match: (MOVHstoreidx2 ptr idx (MOVWreg x) mem)
-       // result: (MOVHstoreidx2 ptr idx x mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWreg {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(isPowerOfTwo64(c)) {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg4(ptr, idx, x, mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c))
+               v.AddArg2(a, x)
                return true
        }
-       // match: (MOVHstoreidx2 ptr idx (MOVWUreg x) mem)
-       // result: (MOVHstoreidx2 ptr idx x mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo64(c-1) && c>=3
+       // result: (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWUreg {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg4(ptr, idx, x, mem)
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(isPowerOfTwo64(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64SUB)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(log64(c - 1))
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstorezero(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVHstorezero [off1+int32(off2)] {sym} ptr mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo64(c+1) && c>=7
+       // result: (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDconst {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := auxIntToInt64(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(isPowerOfTwo64(c+1) && c >= 7) {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64ADD)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(log64(c + 1))
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo64(c/3)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpARM64MOVDaddr {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(c%3 == 0 && isPowerOfTwo64(c/3)) {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 3))
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(2)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstorezero [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVHstorezeroidx ptr idx mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo64(c/5)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(off == 0 && sym == nil) {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(c%5 == 0 && isPowerOfTwo64(c/5)) {
                        break
                }
-               v.reset(OpARM64MOVHstorezeroidx)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 5))
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(2)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo64(c/7)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(off == 0 && sym == nil) {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(c%7 == 0 && isPowerOfTwo64(c/7)) {
                        break
                }
-               v.reset(OpARM64MOVHstorezeroidx2)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 7))
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(3)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(int64(i),int64(j),2) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo64(c/9)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               x := v_1
-               if x.Op != OpARM64MOVHstorezero {
-                       break
-               }
-               j := auxIntToInt32(x.AuxInt)
-               if auxToSym(x.Aux) != s {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[1]
-               ptr1 := x.Args[0]
-               if !(x.Uses == 1 && areAdjacentOffsets(int64(i), int64(j), 2) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(c%9 == 0 && isPowerOfTwo64(c/9)) {
                        break
                }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = int32ToAuxInt(int32(min(int64(i), int64(j))))
-               v.Aux = symToAux(s)
-               v.AddArg2(ptr0, mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 9))
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(3)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstorezeroidx ptr1 idx1 mem)
+       // match: (MSUB (MOVDconst [c]) x y)
+       // result: (ADDconst [c] (MNEG <x.Type> x y))
        for {
-               if auxIntToInt32(v.AuxInt) != 2 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       x := v_1
-                       if x.Op != OpARM64MOVHstorezeroidx {
-                               continue
-                       }
-                       mem := x.Args[2]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVWstorezeroidx)
-                       v.AddArg3(ptr1, idx1, mem)
-                       return true
-               }
-               break
+               c := auxIntToInt64(v_0.AuxInt)
+               x := v_1
+               y := v_2
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = int64ToAuxInt(c)
+               v0 := b.NewValue0(v.Pos, OpARM64MNEG, x.Type)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
+               return true
        }
-       // match: (MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
+       // match: (MSUB a (MOVDconst [c]) (MOVDconst [d]))
+       // result: (SUBconst [c*d] a)
        for {
-               if auxIntToInt32(v.AuxInt) != 2 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 1 {
-                       break
-               }
-               idx0 := v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               x := v_1
-               if x.Op != OpARM64MOVHstorezeroidx2 {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVWstorezeroidx)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = int64ToAuxInt(1)
-               v0.AddArg(idx1)
-               v.AddArg3(ptr1, v0, mem)
+               d := auxIntToInt64(v_2.AuxInt)
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = int64ToAuxInt(c * d)
+               v.AddArg(a)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHstorezeroidx(v *Value) bool {
+func rewriteValueARM64_OpARM64MSUBW(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVHstorezeroidx ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c)
-       // result: (MOVHstorezero [int32(c)] ptr mem)
+       b := v.Block
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: int32(c)==-1
+       // result: (ADD a x)
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               if !(is32Bit(c)) {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(int32(c) == -1) {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64ADD)
+               v.AddArg2(a, x)
                return true
        }
-       // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem)
-       // cond: is32Bit(c)
-       // result: (MOVHstorezero [int32(c)] idx mem)
+       // match: (MSUBW a _ (MOVDconst [c]))
+       // cond: int32(c)==0
+       // result: a
        for {
-               if v_0.Op != OpARM64MOVDconst {
+               a := v_0
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := auxIntToInt64(v_0.AuxInt)
-               idx := v_1
-               mem := v_2
-               if !(is32Bit(c)) {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(int32(c) == 0) {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg2(idx, mem)
+               v.copyOf(a)
                return true
        }
-       // match: (MOVHstorezeroidx ptr (SLLconst [1] idx) mem)
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: int32(c)==1
+       // result: (SUB a x)
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 1 {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v_2
-               v.reset(OpARM64MOVHstorezeroidx2)
-               v.AddArg3(ptr, idx, mem)
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(int32(c) == 1) {
+                       break
+               }
+               v.reset(OpARM64SUB)
+               v.AddArg2(a, x)
                return true
        }
-       // match: (MOVHstorezeroidx ptr (ADD idx idx) mem)
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c)
+       // result: (SUBshiftLL a x [log64(c)])
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64ADD {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_1.Args[1]
-               if idx != v_1.Args[0] {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(isPowerOfTwo64(c)) {
                        break
                }
-               mem := v_2
-               v.reset(OpARM64MOVHstorezeroidx2)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c))
+               v.AddArg2(a, x)
                return true
        }
-       // match: (MOVHstorezeroidx (SLLconst [1] idx) ptr mem)
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c-1) && int32(c)>=3
+       // result: (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
        for {
-               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 1 {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v_1
-               mem := v_2
-               v.reset(OpARM64MOVHstorezeroidx2)
-               v.AddArg3(ptr, idx, mem)
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(isPowerOfTwo64(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARM64SUB)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(log64(c - 1))
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstorezeroidx (ADD idx idx) ptr mem)
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c+1) && int32(c)>=7
+       // result: (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
        for {
-               if v_0.Op != OpARM64ADD {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_0.Args[1]
-               if idx != v_0.Args[0] {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(isPowerOfTwo64(c+1) && int32(c) >= 7) {
                        break
                }
-               ptr := v_1
-               mem := v_2
-               v.reset(OpARM64MOVHstorezeroidx2)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64ADD)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(log64(c + 1))
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstorezeroidx ptr idx mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64ADDconst || auxIntToInt64(v_1.AuxInt) != 2 {
-                       break
-               }
-               idx := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVHstorezeroidx {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               if ptr != x.Args[0] || idx != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 3))
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(2)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstorezeroidx2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c<<1)
-       // result: (MOVHstorezero [int32(c<<1)] ptr mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               if !(is32Bit(c << 1)) {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = int32ToAuxInt(int32(c << 1))
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 5))
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(2)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVQstorezero(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVQstorezero [off1+int32(off2)] {sym} ptr mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDconst {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := auxIntToInt64(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 7))
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(3)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpARM64MOVDaddr {
+               a := v_0
+               x := v_1
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+               c := auxIntToInt64(v_2.AuxInt)
+               if !(c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 9))
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(3)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWUload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _))
-       // result: (FMOVSfpgp val)
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: int32(c)==-1
+       // result: (ADD a x)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpARM64FMOVSstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               val := v_1.Args[1]
-               if ptr != v_1.Args[0] {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(int32(c) == -1) {
                        break
                }
-               v.reset(OpARM64FMOVSfpgp)
-               v.AddArg(val)
+               v.reset(OpARM64ADD)
+               v.AddArg2(a, x)
                return true
        }
-       // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVWUload [off1+int32(off2)] {sym} ptr mem)
+       // match: (MSUBW a (MOVDconst [c]) _)
+       // cond: int32(c)==0
+       // result: a
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDconst {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := auxIntToInt64(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               if !(int32(c) == 0) {
                        break
                }
-               v.reset(OpARM64MOVWUload)
-               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
+               v.copyOf(a)
                return true
        }
-       // match: (MOVWUload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWUloadidx ptr idx mem)
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: int32(c)==1
+       // result: (SUB a x)
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(off == 0 && sym == nil) {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(int32(c) == 1) {
                        break
                }
-               v.reset(OpARM64MOVWUloadidx)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64SUB)
+               v.AddArg2(a, x)
                return true
        }
-       // match: (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWUloadidx4 ptr idx mem)
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo64(c)
+       // result: (SUBshiftLL a x [log64(c)])
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(off == 0 && sym == nil) {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(isPowerOfTwo64(c)) {
                        break
                }
-               v.reset(OpARM64MOVWUloadidx4)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c))
+               v.AddArg2(a, x)
                return true
        }
-       // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo64(c-1) && int32(c)>=3
+       // result: (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpARM64MOVDaddr {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(isPowerOfTwo64(c-1) && int32(c) >= 3) {
                        break
                }
-               v.reset(OpARM64MOVWUload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64SUB)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(log64(c - 1))
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo64(c+1) && int32(c)>=7
+       // result: (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
        for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpARM64MOVWstorezero {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-       // match: (MOVWUload [off] {sym} (SB) _)
-       // cond: symIsRO(sym)
-       // result: (MOVDconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))])
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpSB || !(symIsRO(sym)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(isPowerOfTwo64(c+1) && int32(c) >= 7) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder)))
+               v.reset(OpARM64ADD)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(log64(c + 1))
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWUloadidx(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWUloadidx ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c)
-       // result: (MOVWUload [int32(c)] ptr mem)
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
        for {
-               ptr := v_0
+               a := v_0
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               if !(is32Bit(c)) {
+               x := v_2
+               if !(c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVWUload)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWUloadidx (MOVDconst [c]) ptr mem)
-       // cond: is32Bit(c)
-       // result: (MOVWUload [int32(c)] ptr mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 3))
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(2)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
+               return true
+       }
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
        for {
-               if v_0.Op != OpARM64MOVDconst {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := auxIntToInt64(v_0.AuxInt)
-               ptr := v_1
-               mem := v_2
-               if !(is32Bit(c)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVWUload)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg2(ptr, mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 5))
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(2)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVWUloadidx ptr (SLLconst [2] idx) mem)
-       // result: (MOVWUloadidx4 ptr idx mem)
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v_2
-               v.reset(OpARM64MOVWUloadidx4)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWUloadidx (SLLconst [2] idx) ptr mem)
-       // result: (MOVWUloadidx4 ptr idx mem)
-       for {
-               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)) {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v_1
-               mem := v_2
-               v.reset(OpARM64MOVWUloadidx4)
-               v.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 7))
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(3)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWstorezeroidx {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               idx2 := v_2.Args[1]
-               ptr2 := v_2.Args[0]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               x := v_2
+               if !(c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = int64ToAuxInt(log64(c / 9))
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = int64ToAuxInt(3)
+               v0.AddArg2(x, x)
+               v.AddArg2(a, v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWUloadidx4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWUloadidx4 ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c<<2)
-       // result: (MOVWUload [int32(c)<<2] ptr mem)
+       // match: (MSUBW (MOVDconst [c]) x y)
+       // result: (ADDconst [c] (MNEGW <x.Type> x y))
        for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               if !(is32Bit(c << 2)) {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVWUload)
-               v.AuxInt = int32ToAuxInt(int32(c) << 2)
-               v.AddArg2(ptr, mem)
+               c := auxIntToInt64(v_0.AuxInt)
+               x := v_1
+               y := v_2
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = int64ToAuxInt(c)
+               v0 := b.NewValue0(v.Pos, OpARM64MNEGW, x.Type)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
-       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
-       // result: (MOVDconst [0])
+       // match: (MSUBW a (MOVDconst [c]) (MOVDconst [d]))
+       // result: (SUBconst [int64(int32(c)*int32(d))] a)
        for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWstorezeroidx4 {
+               a := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               idx2 := v_2.Args[1]
-               ptr2 := v_2.Args[0]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
+               d := auxIntToInt64(v_2.AuxInt)
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = int64ToAuxInt(int64(int32(c) * int32(d)))
+               v.AddArg(a)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWUreg(v *Value) bool {
+func rewriteValueARM64_OpARM64MUL(v *Value) bool {
+       v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVWUreg x:(MOVBUload _ _))
-       // result: (MOVDreg x)
+       b := v.Block
+       // match: (MUL (NEG x) y)
+       // result: (MNEG x y)
        for {
-               x := v_0
-               if x.Op != OpARM64MOVBUload {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpARM64NEG {
+                               continue
+                       }
+                       x := v_0.Args[0]
+                       y := v_1
+                       v.reset(OpARM64MNEG)
+                       v.AddArg2(x, y)
+                       return true
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
+               break
        }
-       // match: (MOVWUreg x:(MOVHUload _ _))
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [-1]))
+       // result: (NEG x)
        for {
-               x := v_0
-               if x.Op != OpARM64MOVHUload {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != -1 {
+                               continue
+                       }
+                       v.reset(OpARM64NEG)
+                       v.AddArg(x)
+                       return true
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
+               break
        }
-       // match: (MOVWUreg x:(MOVWUload _ _))
-       // result: (MOVDreg x)
+       // match: (MUL _ (MOVDconst [0]))
+       // result: (MOVDconst [0])
        for {
-               x := v_0
-               if x.Op != OpARM64MOVWUload {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != 0 {
+                               continue
+                       }
+                       v.reset(OpARM64MOVDconst)
+                       v.AuxInt = int64ToAuxInt(0)
+                       return true
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
+               break
        }
-       // match: (MOVWUreg x:(MOVBUloadidx _ _ _))
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [1]))
+       // result: x
        for {
-               x := v_0
-               if x.Op != OpARM64MOVBUloadidx {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != 1 {
+                               continue
+                       }
+                       v.copyOf(x)
+                       return true
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
+               break
        }
-       // match: (MOVWUreg x:(MOVHUloadidx _ _ _))
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c)
+       // result: (SLLconst [log64(c)] x)
        for {
-               x := v_0
-               if x.Op != OpARM64MOVHUloadidx {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(isPowerOfTwo64(c)) {
+                               continue
+                       }
+                       v.reset(OpARM64SLLconst)
+                       v.AuxInt = int64ToAuxInt(log64(c))
+                       v.AddArg(x)
+                       return true
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
+               break
        }
-       // match: (MOVWUreg x:(MOVWUloadidx _ _ _))
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c-1) && c >= 3
+       // result: (ADDshiftLL x x [log64(c-1)])
        for {
-               x := v_0
-               if x.Op != OpARM64MOVWUloadidx {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(isPowerOfTwo64(c-1) && c >= 3) {
+                               continue
+                       }
+                       v.reset(OpARM64ADDshiftLL)
+                       v.AuxInt = int64ToAuxInt(log64(c - 1))
+                       v.AddArg2(x, x)
+                       return true
+               }
+               break
        }
-       // match: (MOVWUreg x:(MOVHUloadidx2 _ _ _))
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c+1) && c >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log64(c+1)])
        for {
-               x := v_0
-               if x.Op != OpARM64MOVHUloadidx2 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(isPowerOfTwo64(c+1) && c >= 7) {
+                               continue
+                       }
+                       v.reset(OpARM64ADDshiftLL)
+                       v.AuxInt = int64ToAuxInt(log64(c + 1))
+                       v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+                       v0.AddArg(x)
+                       v.AddArg2(v0, x)
+                       return true
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
+               break
        }
-       // match: (MOVWUreg x:(MOVWUloadidx4 _ _ _))
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo64(c/3)
+       // result: (SLLconst [log64(c/3)] (ADDshiftLL <x.Type> x x [1]))
        for {
-               x := v_0
-               if x.Op != OpARM64MOVWUloadidx4 {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(c%3 == 0 && isPowerOfTwo64(c/3)) {
+                               continue
+                       }
+                       v.reset(OpARM64SLLconst)
+                       v.AuxInt = int64ToAuxInt(log64(c / 3))
+                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+                       v0.AuxInt = int64ToAuxInt(1)
+                       v0.AddArg2(x, x)
+                       v.AddArg(v0)
+                       return true
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
+               break
        }
-       // match: (MOVWUreg x:(MOVBUreg _))
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo64(c/5)
+       // result: (SLLconst [log64(c/5)] (ADDshiftLL <x.Type> x x [2]))
        for {
-               x := v_0
-               if x.Op != OpARM64MOVBUreg {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(c%5 == 0 && isPowerOfTwo64(c/5)) {
+                               continue
+                       }
+                       v.reset(OpARM64SLLconst)
+                       v.AuxInt = int64ToAuxInt(log64(c / 5))
+                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+                       v0.AuxInt = int64ToAuxInt(2)
+                       v0.AddArg2(x, x)
+                       v.AddArg(v0)
+                       return true
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
+               break
        }
-       // match: (MOVWUreg x:(MOVHUreg _))
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo64(c/7)
+       // result: (SLLconst [log64(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
        for {
-               x := v_0
-               if x.Op != OpARM64MOVHUreg {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(c%7 == 0 && isPowerOfTwo64(c/7)) {
+                               continue
+                       }
+                       v.reset(OpARM64SLLconst)
+                       v.AuxInt = int64ToAuxInt(log64(c / 7))
+                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+                       v0.AuxInt = int64ToAuxInt(3)
+                       v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+                       v1.AddArg(x)
+                       v0.AddArg2(v1, x)
+                       v.AddArg(v0)
+                       return true
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
+               break
        }
-       // match: (MOVWUreg x:(MOVWUreg _))
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo64(c/9)
+       // result: (SLLconst [log64(c/9)] (ADDshiftLL <x.Type> x x [3]))
        for {
-               x := v_0
-               if x.Op != OpARM64MOVWUreg {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(c%9 == 0 && isPowerOfTwo64(c/9)) {
+                               continue
+                       }
+                       v.reset(OpARM64SLLconst)
+                       v.AuxInt = int64ToAuxInt(log64(c / 9))
+                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+                       v0.AuxInt = int64ToAuxInt(3)
+                       v0.AddArg2(x, x)
+                       v.AddArg(v0)
+                       return true
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
+               break
        }
-       // match: (MOVWUreg (ANDconst [c] x))
-       // result: (ANDconst [c&(1<<32-1)] x)
+       // match: (MUL (MOVDconst [c]) (MOVDconst [d]))
+       // result: (MOVDconst [c*d])
        for {
-               if v_0.Op != OpARM64ANDconst {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_0.AuxInt)
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       d := auxIntToInt64(v_1.AuxInt)
+                       v.reset(OpARM64MOVDconst)
+                       v.AuxInt = int64ToAuxInt(c * d)
+                       return true
                }
-               c := auxIntToInt64(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = int64ToAuxInt(c & (1<<32 - 1))
-               v.AddArg(x)
-               return true
+               break
        }
-       // match: (MOVWUreg (MOVDconst [c]))
-       // result: (MOVDconst [int64(uint32(c))])
+       return false
+}
+func rewriteValueARM64_OpARM64MULW(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MULW (NEG x) y)
+       // result: (MNEGW x y)
        for {
-               if v_0.Op != OpARM64MOVDconst {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpARM64NEG {
+                               continue
+                       }
+                       x := v_0.Args[0]
+                       y := v_1
+                       v.reset(OpARM64MNEGW)
+                       v.AddArg2(x, y)
+                       return true
                }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(int64(uint32(c)))
-               return true
+               break
        }
-       // match: (MOVWUreg x)
-       // cond: zeroUpper32Bits(x, 3)
-       // result: x
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==-1
+       // result: (NEG x)
        for {
-               x := v_0
-               if !(zeroUpper32Bits(x, 3)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(int32(c) == -1) {
+                               continue
+                       }
+                       v.reset(OpARM64NEG)
+                       v.AddArg(x)
+                       return true
                }
-               v.copyOf(x)
-               return true
+               break
        }
-       // match: (MOVWUreg (SLLconst [lc] x))
-       // cond: lc >= 32
+       // match: (MULW _ (MOVDconst [c]))
+       // cond: int32(c)==0
        // result: (MOVDconst [0])
        for {
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               lc := auxIntToInt64(v_0.AuxInt)
-               if !(lc >= 32) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(int32(c) == 0) {
+                               continue
+                       }
+                       v.reset(OpARM64MOVDconst)
+                       v.AuxInt = int64ToAuxInt(0)
+                       return true
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-       // match: (MOVWUreg (SLLconst [lc] x))
-       // cond: lc < 32
-       // result: (UBFIZ [armBFAuxInt(lc, 32-lc)] x)
-       for {
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               lc := auxIntToInt64(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(lc < 32) {
-                       break
-               }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(lc, 32-lc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg (SRLconst [rc] x))
-       // cond: rc < 32
-       // result: (UBFX [armBFAuxInt(rc, 32)] x)
-       for {
-               if v_0.Op != OpARM64SRLconst {
-                       break
-               }
-               rc := auxIntToInt64(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(rc < 32) {
-                       break
-               }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(rc, 32))
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg (UBFX [bfc] x))
-       // cond: bfc.getARM64BFwidth() <= 32
-       // result: (UBFX [bfc] x)
-       for {
-               if v_0.Op != OpARM64UBFX {
-                       break
-               }
-               bfc := auxIntToArm64BitField(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(bfc.getARM64BFwidth() <= 32) {
-                       break
-               }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BitFieldToAuxInt(bfc)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWload(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVWload [off1+int32(off2)] {sym} ptr mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := auxIntToInt64(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
-                       break
-               }
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWloadidx ptr idx mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(off == 0 && sym == nil) {
-                       break
-               }
-               v.reset(OpARM64MOVWloadidx)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWloadidx4 ptr idx mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
-                       break
-               }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(off == 0 && sym == nil) {
-                       break
-               }
-               v.reset(OpARM64MOVWloadidx4)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpARM64MOVDaddr {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
-                       break
-               }
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpARM64MOVWstorezero {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWloadidx(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWloadidx ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c)
-       // result: (MOVWload [int32(c)] ptr mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWloadidx (MOVDconst [c]) ptr mem)
-       // cond: is32Bit(c)
-       // result: (MOVWload [int32(c)] ptr mem)
-       for {
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               ptr := v_1
-               mem := v_2
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWloadidx ptr (SLLconst [2] idx) mem)
-       // result: (MOVWloadidx4 ptr idx mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
-                       break
-               }
-               idx := v_1.Args[0]
-               mem := v_2
-               v.reset(OpARM64MOVWloadidx4)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWloadidx (SLLconst [2] idx) ptr mem)
-       // result: (MOVWloadidx4 ptr idx mem)
-       for {
-               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
-                       break
-               }
-               idx := v_0.Args[0]
-               ptr := v_1
-               mem := v_2
-               v.reset(OpARM64MOVWloadidx4)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWstorezeroidx {
-                       break
-               }
-               idx2 := v_2.Args[1]
-               ptr2 := v_2.Args[0]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWloadidx4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWloadidx4 ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c<<2)
-       // result: (MOVWload [int32(c)<<2] ptr mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               if !(is32Bit(c << 2)) {
-                       break
-               }
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = int32ToAuxInt(int32(c) << 2)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
-       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
-       // result: (MOVDconst [0])
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWstorezeroidx4 {
-                       break
-               }
-               idx2 := v_2.Args[1]
-               ptr2 := v_2.Args[0]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWreg(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (MOVWreg x:(MOVBload _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVBload {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBUload _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVBUload {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHload _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVHload {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHUload _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVHUload {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWload _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVWload {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBloadidx _ _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVBloadidx {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBUloadidx _ _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHloadidx _ _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVHloadidx {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHUloadidx _ _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVHUloadidx {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWloadidx _ _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVWloadidx {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHloadidx2 _ _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVHloadidx2 {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHUloadidx2 _ _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVHUloadidx2 {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWloadidx4 _ _ _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVWloadidx4 {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBreg _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVBreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBUreg _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVBUreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHreg _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVHreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWreg _))
-       // result: (MOVDreg x)
-       for {
-               x := v_0
-               if x.Op != OpARM64MOVWreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg (MOVDconst [c]))
-       // result: (MOVDconst [int64(int32(c))])
-       for {
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(int64(int32(c)))
-               return true
-       }
-       // match: (MOVWreg <t> (ANDconst x [c]))
-       // cond: uint64(c) & uint64(0xffffffff80000000) == 0
-       // result: (ANDconst <t> x [c])
-       for {
-               t := v.Type
-               if v_0.Op != OpARM64ANDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(uint64(c)&uint64(0xffffffff80000000) == 0) {
-                       break
-               }
-               v.reset(OpARM64ANDconst)
-               v.Type = t
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg (SLLconst [lc] x))
-       // cond: lc < 32
-       // result: (SBFIZ [armBFAuxInt(lc, 32-lc)] x)
-       for {
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               lc := auxIntToInt64(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(lc < 32) {
-                       break
-               }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BitFieldToAuxInt(armBFAuxInt(lc, 32-lc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg (SBFX [bfc] x))
-       // cond: bfc.getARM64BFwidth() <= 32
-       // result: (SBFX [bfc] x)
-       for {
-               if v_0.Op != OpARM64SBFX {
-                       break
-               }
-               bfc := auxIntToArm64BitField(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(bfc.getARM64BFwidth() <= 32) {
-                       break
-               }
-               v.reset(OpARM64SBFX)
-               v.AuxInt = arm64BitFieldToAuxInt(bfc)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstore(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem)
-       // result: (FMOVSstore [off] {sym} ptr val mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpARM64FMOVSfpgp {
-                       break
-               }
-               val := v_1.Args[0]
-               mem := v_2
-               v.reset(OpARM64FMOVSstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVWstore [off1+int32(off2)] {sym} ptr val mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := auxIntToInt64(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstoreidx ptr idx val mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(off == 0 && sym == nil) {
-                       break
-               }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstoreidx4 ptr idx val mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
-                       break
-               }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(off == 0 && sym == nil) {
-                       break
-               }
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpARM64MOVDaddr {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               ptr := v_0.Args[0]
-               val := v_1
-               mem := v_2
-               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
-       // result: (MOVWstorezero [off] {sym} ptr mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != 0 {
-                       break
-               }
-               mem := v_2
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpARM64MOVWreg {
-                       break
-               }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               ptr := v_0
-               if v_1.Op != OpARM64MOVWUreg {
-                       break
-               }
-               x := v_1.Args[0]
-               mem := v_2
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg3(ptr, x, mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstore [i-4] {s} ptr0 w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 32 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVWstore || auxIntToInt32(x.AuxInt) != i-4 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = int32ToAuxInt(i - 4)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w, mem)
-               return true
-       }
-       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstoreidx ptr1 idx1 w mem)
-       for {
-               if auxIntToInt32(v.AuxInt) != 4 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 32 {
-                               continue
-                       }
-                       w := v_1.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVWstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if w != x.Args[2] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVDstoreidx)
-                       v.AddArg4(ptr1, idx1, w, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
-       for {
-               if auxIntToInt32(v.AuxInt) != 4 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
-                       break
-               }
-               idx0 := v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != 32 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVWstoreidx4 {
-                       break
-               }
-               mem := x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] || !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVDstoreidx)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = int64ToAuxInt(2)
-               v0.AddArg(idx1)
-               v.AddArg4(ptr1, v0, w, mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstore [i-4] {s} ptr0 w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               j := auxIntToInt64(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVWstore || auxIntToInt32(x.AuxInt) != i-4 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst || auxIntToInt64(w0.AuxInt) != j-32 || w != w0.Args[0] || !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = int32ToAuxInt(i - 4)
-               v.Aux = symToAux(s)
-               v.AddArg3(ptr0, w0, mem)
-               return true
-       }
-       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstoreidx ptr1 idx1 w0 mem)
-       for {
-               if auxIntToInt32(v.AuxInt) != 4 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       if v_1.Op != OpARM64SRLconst {
-                               continue
-                       }
-                       j := auxIntToInt64(v_1.AuxInt)
-                       w := v_1.Args[0]
-                       x := v_2
-                       if x.Op != OpARM64MOVWstoreidx {
-                               continue
-                       }
-                       mem := x.Args[3]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       w0 := x.Args[2]
-                       if w0.Op != OpARM64SRLconst || auxIntToInt64(w0.AuxInt) != j-32 || w != w0.Args[0] || !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVDstoreidx)
-                       v.AddArg4(ptr1, idx1, w0, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
-       for {
-               if auxIntToInt32(v.AuxInt) != 4 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
-                       break
-               }
-               idx0 := v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               j := auxIntToInt64(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVWstoreidx4 {
-                       break
-               }
-               mem := x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst || auxIntToInt64(w0.AuxInt) != j-32 || w != w0.Args[0] || !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVDstoreidx)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = int64ToAuxInt(2)
-               v0.AddArg(idx1)
-               v.AddArg4(ptr1, v0, w0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstoreidx(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem)
-       // cond: is32Bit(c)
-       // result: (MOVWstore [int32(c)] ptr val mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVWstoreidx (MOVDconst [c]) idx val mem)
-       // cond: is32Bit(c)
-       // result: (MOVWstore [int32(c)] idx val mem)
-       for {
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               idx := v_1
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg3(idx, val, mem)
-               return true
-       }
-       // match: (MOVWstoreidx ptr (SLLconst [2] idx) val mem)
-       // result: (MOVWstoreidx4 ptr idx val mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
-                       break
-               }
-               idx := v_1.Args[0]
-               val := v_2
-               mem := v_3
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVWstoreidx (SLLconst [2] idx) ptr val mem)
-       // result: (MOVWstoreidx4 ptr idx val mem)
-       for {
-               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
-                       break
-               }
-               idx := v_0.Args[0]
-               ptr := v_1
-               val := v_2
-               mem := v_3
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg4(ptr, idx, val, mem)
-               return true
-       }
-       // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem)
-       // result: (MOVWstorezeroidx ptr idx mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
-                       break
-               }
-               mem := v_3
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWstoreidx ptr idx (MOVWreg x) mem)
-       // result: (MOVWstoreidx ptr idx x mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWreg {
-                       break
-               }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
-               return true
-       }
-       // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem)
-       // result: (MOVWstoreidx ptr idx x mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWUreg {
-                       break
-               }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
-               return true
-       }
-       // match: (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVDstoreidx ptr idx w mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64ADDconst || auxIntToInt64(v_1.AuxInt) != 4 {
-                       break
-               }
-               idx := v_1.Args[0]
-               if v_2.Op != OpARM64SRLconst || auxIntToInt64(v_2.AuxInt) != 32 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v_3
-               if x.Op != OpARM64MOVWstoreidx {
-                       break
-               }
-               mem := x.Args[3]
-               if ptr != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVDstoreidx)
-               v.AddArg4(ptr, idx, w, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstoreidx4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWstoreidx4 ptr (MOVDconst [c]) val mem)
-       // cond: is32Bit(c<<2)
-       // result: (MOVWstore [int32(c)<<2] ptr val mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               val := v_2
-               mem := v_3
-               if !(is32Bit(c << 2)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = int32ToAuxInt(int32(c) << 2)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem)
-       // result: (MOVWstorezeroidx4 ptr idx mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
-                       break
-               }
-               mem := v_3
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWstoreidx4 ptr idx (MOVWreg x) mem)
-       // result: (MOVWstoreidx4 ptr idx x mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWreg {
-                       break
-               }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg4(ptr, idx, x, mem)
-               return true
-       }
-       // match: (MOVWstoreidx4 ptr idx (MOVWUreg x) mem)
-       // result: (MOVWstoreidx4 ptr idx x mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpARM64MOVWUreg {
-                       break
-               }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg4(ptr, idx, x, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstorezero(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVWstorezero [off1+int32(off2)] {sym} ptr mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := auxIntToInt64(v_0.AuxInt)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
-               v.Aux = symToAux(sym)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
-       // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               if v_0.Op != OpARM64MOVDaddr {
-                       break
-               }
-               off2 := auxIntToInt32(v_0.AuxInt)
-               sym2 := auxToSym(v_0.Aux)
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstorezeroidx ptr idx mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(off == 0 && sym == nil) {
-                       break
-               }
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstorezeroidx4 ptr idx mem)
-       for {
-               off := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
-                       break
-               }
-               idx := v_0.Args[1]
-               ptr := v_0.Args[0]
-               mem := v_1
-               if !(off == 0 && sym == nil) {
-                       break
-               }
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(int64(i),int64(j),4) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               ptr0 := v_0
-               x := v_1
-               if x.Op != OpARM64MOVWstorezero {
-                       break
-               }
-               j := auxIntToInt32(x.AuxInt)
-               if auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[1]
-               ptr1 := x.Args[0]
-               if !(x.Uses == 1 && areAdjacentOffsets(int64(i), int64(j), 4) && isSamePtr(ptr0, ptr1) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = int32ToAuxInt(int32(min(int64(i), int64(j))))
-               v.Aux = symToAux(s)
-               v.AddArg2(ptr0, mem)
-               return true
-       }
-       // match: (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstorezeroidx ptr1 idx1 mem)
-       for {
-               if auxIntToInt32(v.AuxInt) != 4 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
-                       ptr0 := v_0_0
-                       idx0 := v_0_1
-                       x := v_1
-                       if x.Op != OpARM64MOVWstorezeroidx {
-                               continue
-                       }
-                       mem := x.Args[2]
-                       ptr1 := x.Args[0]
-                       idx1 := x.Args[1]
-                       if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVDstorezeroidx)
-                       v.AddArg3(ptr1, idx1, mem)
-                       return true
-               }
-               break
-       }
-       // match: (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
-       for {
-               if auxIntToInt32(v.AuxInt) != 4 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
-                       break
-               }
-               idx0 := v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               x := v_1
-               if x.Op != OpARM64MOVWstorezeroidx4 {
-                       break
-               }
-               mem := x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVDstorezeroidx)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = int64ToAuxInt(2)
-               v0.AddArg(idx1)
-               v.AddArg3(ptr1, v0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstorezeroidx(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c)
-       // result: (MOVWstorezero [int32(c)] ptr mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem)
-       // cond: is32Bit(c)
-       // result: (MOVWstorezero [int32(c)] idx mem)
-       for {
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               idx := v_1
-               mem := v_2
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg2(idx, mem)
-               return true
-       }
-       // match: (MOVWstorezeroidx ptr (SLLconst [2] idx) mem)
-       // result: (MOVWstorezeroidx4 ptr idx mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
-                       break
-               }
-               idx := v_1.Args[0]
-               mem := v_2
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWstorezeroidx (SLLconst [2] idx) ptr mem)
-       // result: (MOVWstorezeroidx4 ptr idx mem)
-       for {
-               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
-                       break
-               }
-               idx := v_0.Args[0]
-               ptr := v_1
-               mem := v_2
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       // match: (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVDstorezeroidx ptr idx mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64ADDconst || auxIntToInt64(v_1.AuxInt) != 4 {
-                       break
-               }
-               idx := v_1.Args[0]
-               x := v_2
-               if x.Op != OpARM64MOVWstorezeroidx {
-                       break
-               }
-               mem := x.Args[2]
-               if ptr != x.Args[0] || idx != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVDstorezeroidx)
-               v.AddArg3(ptr, idx, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstorezeroidx4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem)
-       // cond: is32Bit(c<<2)
-       // result: (MOVWstorezero [int32(c<<2)] ptr mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mem := v_2
-               if !(is32Bit(c << 2)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = int32ToAuxInt(int32(c << 2))
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MSUB(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MSUB a x (MOVDconst [-1]))
-       // result: (ADD a x)
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != -1 {
-                       break
-               }
-               v.reset(OpARM64ADD)
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUB a _ (MOVDconst [0]))
-       // result: a
-       for {
-               a := v_0
-               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 0 {
-                       break
-               }
-               v.copyOf(a)
-               return true
-       }
-       // match: (MSUB a x (MOVDconst [1]))
-       // result: (SUB a x)
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst || auxIntToInt64(v_2.AuxInt) != 1 {
-                       break
-               }
-               v.reset(OpARM64SUB)
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUB a x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c)
-       // result: (SUBshiftLL a x [log64(c)])
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(isPowerOfTwo64(c)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c))
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUB a x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c-1) && c>=3
-       // result: (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(isPowerOfTwo64(c-1) && c >= 3) {
-                       break
-               }
-               v.reset(OpARM64SUB)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(log64(c - 1))
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB a x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c+1) && c>=7
-       // result: (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(isPowerOfTwo64(c+1) && c >= 7) {
-                       break
-               }
-               v.reset(OpARM64ADD)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(log64(c + 1))
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB a x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo64(c/3)
-       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(c%3 == 0 && isPowerOfTwo64(c/3)) {
-                       break
-               }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 3))
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(2)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB a x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo64(c/5)
-       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(c%5 == 0 && isPowerOfTwo64(c/5)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 5))
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(2)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB a x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo64(c/7)
-       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(c%7 == 0 && isPowerOfTwo64(c/7)) {
-                       break
-               }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 7))
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(3)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB a x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo64(c/9)
-       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(c%9 == 0 && isPowerOfTwo64(c/9)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 9))
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(3)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB a (MOVDconst [-1]) x)
-       // result: (ADD a x)
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != -1 {
-                       break
-               }
-               x := v_2
-               v.reset(OpARM64ADD)
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUB a (MOVDconst [0]) _)
-       // result: a
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != 0 {
-                       break
-               }
-               v.copyOf(a)
-               return true
-       }
-       // match: (MSUB a (MOVDconst [1]) x)
-       // result: (SUB a x)
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != 1 {
-                       break
-               }
-               x := v_2
-               v.reset(OpARM64SUB)
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUB a (MOVDconst [c]) x)
-       // cond: isPowerOfTwo64(c)
-       // result: (SUBshiftLL a x [log64(c)])
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(isPowerOfTwo64(c)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c))
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUB a (MOVDconst [c]) x)
-       // cond: isPowerOfTwo64(c-1) && c>=3
-       // result: (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(isPowerOfTwo64(c-1) && c >= 3) {
-                       break
-               }
-               v.reset(OpARM64SUB)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(log64(c - 1))
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB a (MOVDconst [c]) x)
-       // cond: isPowerOfTwo64(c+1) && c>=7
-       // result: (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(isPowerOfTwo64(c+1) && c >= 7) {
-                       break
-               }
-               v.reset(OpARM64ADD)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(log64(c + 1))
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB a (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo64(c/3)
-       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(c%3 == 0 && isPowerOfTwo64(c/3)) {
-                       break
-               }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 3))
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(2)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB a (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo64(c/5)
-       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(c%5 == 0 && isPowerOfTwo64(c/5)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 5))
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(2)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB a (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo64(c/7)
-       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(c%7 == 0 && isPowerOfTwo64(c/7)) {
-                       break
-               }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 7))
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(3)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB a (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo64(c/9)
-       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(c%9 == 0 && isPowerOfTwo64(c/9)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 9))
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(3)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUB (MOVDconst [c]) x y)
-       // result: (ADDconst [c] (MNEG <x.Type> x y))
-       for {
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               x := v_1
-               y := v_2
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = int64ToAuxInt(c)
-               v0 := b.NewValue0(v.Pos, OpARM64MNEG, x.Type)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MSUB a (MOVDconst [c]) (MOVDconst [d]))
-       // result: (SUBconst [c*d] a)
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               d := auxIntToInt64(v_2.AuxInt)
-               v.reset(OpARM64SUBconst)
-               v.AuxInt = int64ToAuxInt(c * d)
-               v.AddArg(a)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MSUBW(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MSUBW a x (MOVDconst [c]))
-       // cond: int32(c)==-1
-       // result: (ADD a x)
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(int32(c) == -1) {
-                       break
-               }
-               v.reset(OpARM64ADD)
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUBW a _ (MOVDconst [c]))
-       // cond: int32(c)==0
-       // result: a
-       for {
-               a := v_0
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(int32(c) == 0) {
-                       break
-               }
-               v.copyOf(a)
-               return true
-       }
-       // match: (MSUBW a x (MOVDconst [c]))
-       // cond: int32(c)==1
-       // result: (SUB a x)
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(int32(c) == 1) {
-                       break
-               }
-               v.reset(OpARM64SUB)
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUBW a x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c)
-       // result: (SUBshiftLL a x [log64(c)])
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(isPowerOfTwo64(c)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c))
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUBW a x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c-1) && int32(c)>=3
-       // result: (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(isPowerOfTwo64(c-1) && int32(c) >= 3) {
-                       break
-               }
-               v.reset(OpARM64SUB)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(log64(c - 1))
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW a x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c+1) && int32(c)>=7
-       // result: (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(isPowerOfTwo64(c+1) && int32(c) >= 7) {
-                       break
-               }
-               v.reset(OpARM64ADD)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(log64(c + 1))
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW a x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)
-       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 3))
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(2)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW a x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)
-       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 5))
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(2)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW a x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)
-       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 7))
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(3)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW a x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)
-       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
-       for {
-               a := v_0
-               x := v_1
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_2.AuxInt)
-               if !(c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 9))
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(3)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW a (MOVDconst [c]) x)
-       // cond: int32(c)==-1
-       // result: (ADD a x)
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(int32(c) == -1) {
-                       break
-               }
-               v.reset(OpARM64ADD)
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUBW a (MOVDconst [c]) _)
-       // cond: int32(c)==0
-       // result: a
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               if !(int32(c) == 0) {
-                       break
-               }
-               v.copyOf(a)
-               return true
-       }
-       // match: (MSUBW a (MOVDconst [c]) x)
-       // cond: int32(c)==1
-       // result: (SUB a x)
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(int32(c) == 1) {
-                       break
-               }
-               v.reset(OpARM64SUB)
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUBW a (MOVDconst [c]) x)
-       // cond: isPowerOfTwo64(c)
-       // result: (SUBshiftLL a x [log64(c)])
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(isPowerOfTwo64(c)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c))
-               v.AddArg2(a, x)
-               return true
-       }
-       // match: (MSUBW a (MOVDconst [c]) x)
-       // cond: isPowerOfTwo64(c-1) && int32(c)>=3
-       // result: (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(isPowerOfTwo64(c-1) && int32(c) >= 3) {
-                       break
-               }
-               v.reset(OpARM64SUB)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(log64(c - 1))
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW a (MOVDconst [c]) x)
-       // cond: isPowerOfTwo64(c+1) && int32(c)>=7
-       // result: (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(isPowerOfTwo64(c+1) && int32(c) >= 7) {
-                       break
-               }
-               v.reset(OpARM64ADD)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(log64(c + 1))
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW a (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)
-       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 3))
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(2)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW a (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)
-       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 5))
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(2)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW a (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)
-       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 7))
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(3)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW a (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)
-       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               x := v_2
-               if !(c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = int64ToAuxInt(log64(c / 9))
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = int64ToAuxInt(3)
-               v0.AddArg2(x, x)
-               v.AddArg2(a, v0)
-               return true
-       }
-       // match: (MSUBW (MOVDconst [c]) x y)
-       // result: (ADDconst [c] (MNEGW <x.Type> x y))
-       for {
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               x := v_1
-               y := v_2
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = int64ToAuxInt(c)
-               v0 := b.NewValue0(v.Pos, OpARM64MNEGW, x.Type)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MSUBW a (MOVDconst [c]) (MOVDconst [d]))
-       // result: (SUBconst [int64(int32(c)*int32(d))] a)
-       for {
-               a := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               d := auxIntToInt64(v_2.AuxInt)
-               v.reset(OpARM64SUBconst)
-               v.AuxInt = int64ToAuxInt(int64(int32(c) * int32(d)))
-               v.AddArg(a)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MUL(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MUL (NEG x) y)
-       // result: (MNEG x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpARM64NEG {
-                               continue
-                       }
-                       x := v_0.Args[0]
-                       y := v_1
-                       v.reset(OpARM64MNEG)
-                       v.AddArg2(x, y)
-                       return true
-               }
-               break
-       }
-       // match: (MUL x (MOVDconst [-1]))
-       // result: (NEG x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != -1 {
-                               continue
-                       }
-                       v.reset(OpARM64NEG)
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       // match: (MUL _ (MOVDconst [0]))
-       // result: (MOVDconst [0])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != 0 {
-                               continue
-                       }
-                       v.reset(OpARM64MOVDconst)
-                       v.AuxInt = int64ToAuxInt(0)
-                       return true
-               }
-               break
-       }
-       // match: (MUL x (MOVDconst [1]))
-       // result: x
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != 1 {
-                               continue
-                       }
-                       v.copyOf(x)
-                       return true
-               }
-               break
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c)
-       // result: (SLLconst [log64(c)] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(isPowerOfTwo64(c)) {
-                               continue
-                       }
-                       v.reset(OpARM64SLLconst)
-                       v.AuxInt = int64ToAuxInt(log64(c))
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c-1) && c >= 3
-       // result: (ADDshiftLL x x [log64(c-1)])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(isPowerOfTwo64(c-1) && c >= 3) {
-                               continue
-                       }
-                       v.reset(OpARM64ADDshiftLL)
-                       v.AuxInt = int64ToAuxInt(log64(c - 1))
-                       v.AddArg2(x, x)
-                       return true
-               }
-               break
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c+1) && c >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log64(c+1)])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(isPowerOfTwo64(c+1) && c >= 7) {
-                               continue
-                       }
-                       v.reset(OpARM64ADDshiftLL)
-                       v.AuxInt = int64ToAuxInt(log64(c + 1))
-                       v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-                       v0.AddArg(x)
-                       v.AddArg2(v0, x)
-                       return true
-               }
-               break
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo64(c/3)
-       // result: (SLLconst [log64(c/3)] (ADDshiftLL <x.Type> x x [1]))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(c%3 == 0 && isPowerOfTwo64(c/3)) {
-                               continue
-                       }
-                       v.reset(OpARM64SLLconst)
-                       v.AuxInt = int64ToAuxInt(log64(c / 3))
-                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-                       v0.AuxInt = int64ToAuxInt(1)
-                       v0.AddArg2(x, x)
-                       v.AddArg(v0)
-                       return true
-               }
-               break
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo64(c/5)
-       // result: (SLLconst [log64(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(c%5 == 0 && isPowerOfTwo64(c/5)) {
-                               continue
-                       }
-                       v.reset(OpARM64SLLconst)
-                       v.AuxInt = int64ToAuxInt(log64(c / 5))
-                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-                       v0.AuxInt = int64ToAuxInt(2)
-                       v0.AddArg2(x, x)
-                       v.AddArg(v0)
-                       return true
-               }
-               break
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo64(c/7)
-       // result: (SLLconst [log64(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(c%7 == 0 && isPowerOfTwo64(c/7)) {
-                               continue
-                       }
-                       v.reset(OpARM64SLLconst)
-                       v.AuxInt = int64ToAuxInt(log64(c / 7))
-                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-                       v0.AuxInt = int64ToAuxInt(3)
-                       v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-                       v1.AddArg(x)
-                       v0.AddArg2(v1, x)
-                       v.AddArg(v0)
-                       return true
-               }
-               break
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo64(c/9)
-       // result: (SLLconst [log64(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(c%9 == 0 && isPowerOfTwo64(c/9)) {
-                               continue
-                       }
-                       v.reset(OpARM64SLLconst)
-                       v.AuxInt = int64ToAuxInt(log64(c / 9))
-                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-                       v0.AuxInt = int64ToAuxInt(3)
-                       v0.AddArg2(x, x)
-                       v.AddArg(v0)
-                       return true
-               }
-               break
-       }
-       // match: (MUL (MOVDconst [c]) (MOVDconst [d]))
-       // result: (MOVDconst [c*d])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_0.AuxInt)
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       d := auxIntToInt64(v_1.AuxInt)
-                       v.reset(OpARM64MOVDconst)
-                       v.AuxInt = int64ToAuxInt(c * d)
-                       return true
-               }
-               break
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MULW (NEG x) y)
-       // result: (MNEGW x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpARM64NEG {
-                               continue
-                       }
-                       x := v_0.Args[0]
-                       y := v_1
-                       v.reset(OpARM64MNEGW)
-                       v.AddArg2(x, y)
-                       return true
-               }
-               break
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==-1
-       // result: (NEG x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(int32(c) == -1) {
-                               continue
-                       }
-                       v.reset(OpARM64NEG)
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       // match: (MULW _ (MOVDconst [c]))
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(int32(c) == 0) {
-                               continue
-                       }
-                       v.reset(OpARM64MOVDconst)
-                       v.AuxInt = int64ToAuxInt(0)
-                       return true
-               }
-               break
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==1
-       // result: x
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(int32(c) == 1) {
-                               continue
-                       }
-                       v.copyOf(x)
-                       return true
-               }
-               break
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c)
-       // result: (SLLconst [log64(c)] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(isPowerOfTwo64(c)) {
-                               continue
-                       }
-                       v.reset(OpARM64SLLconst)
-                       v.AuxInt = int64ToAuxInt(log64(c))
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log64(c-1)])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(isPowerOfTwo64(c-1) && int32(c) >= 3) {
-                               continue
-                       }
-                       v.reset(OpARM64ADDshiftLL)
-                       v.AuxInt = int64ToAuxInt(log64(c - 1))
-                       v.AddArg2(x, x)
-                       return true
-               }
-               break
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo64(c+1) && int32(c) >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log64(c+1)])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(isPowerOfTwo64(c+1) && int32(c) >= 7) {
-                               continue
-                       }
-                       v.reset(OpARM64ADDshiftLL)
-                       v.AuxInt = int64ToAuxInt(log64(c + 1))
-                       v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-                       v0.AddArg(x)
-                       v.AddArg2(v0, x)
-                       return true
-               }
-               break
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)
-       // result: (SLLconst [log64(c/3)] (ADDshiftLL <x.Type> x x [1]))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)) {
-                               continue
-                       }
-                       v.reset(OpARM64SLLconst)
-                       v.AuxInt = int64ToAuxInt(log64(c / 3))
-                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-                       v0.AuxInt = int64ToAuxInt(1)
-                       v0.AddArg2(x, x)
-                       v.AddArg(v0)
-                       return true
-               }
-               break
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)
-       // result: (SLLconst [log64(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)) {
-                               continue
-                       }
-                       v.reset(OpARM64SLLconst)
-                       v.AuxInt = int64ToAuxInt(log64(c / 5))
-                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-                       v0.AuxInt = int64ToAuxInt(2)
-                       v0.AddArg2(x, x)
-                       v.AddArg(v0)
-                       return true
-               }
-               break
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)
-       // result: (SLLconst [log64(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)) {
-                               continue
-                       }
-                       v.reset(OpARM64SLLconst)
-                       v.AuxInt = int64ToAuxInt(log64(c / 7))
-                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-                       v0.AuxInt = int64ToAuxInt(3)
-                       v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-                       v1.AddArg(x)
-                       v0.AddArg2(v1, x)
-                       v.AddArg(v0)
-                       return true
-               }
-               break
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)
-       // result: (SLLconst [log64(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       if !(c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)) {
-                               continue
-                       }
-                       v.reset(OpARM64SLLconst)
-                       v.AuxInt = int64ToAuxInt(log64(c / 9))
-                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-                       v0.AuxInt = int64ToAuxInt(3)
-                       v0.AddArg2(x, x)
-                       v.AddArg(v0)
-                       return true
-               }
-               break
-       }
-       // match: (MULW (MOVDconst [c]) (MOVDconst [d]))
-       // result: (MOVDconst [int64(int32(c)*int32(d))])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_0.AuxInt)
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       d := auxIntToInt64(v_1.AuxInt)
-                       v.reset(OpARM64MOVDconst)
-                       v.AuxInt = int64ToAuxInt(int64(int32(c) * int32(d)))
-                       return true
-               }
-               break
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MVN(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (MVN (XOR x y))
-       // result: (EON x y)
-       for {
-               if v_0.Op != OpARM64XOR {
-                       break
-               }
-               y := v_0.Args[1]
-               x := v_0.Args[0]
-               v.reset(OpARM64EON)
-               v.AddArg2(x, y)
-               return true
-       }
-       // match: (MVN (MOVDconst [c]))
-       // result: (MOVDconst [^c])
-       for {
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(^c)
-               return true
-       }
-       // match: (MVN x:(SLLconst [c] y))
-       // cond: clobberIfDead(x)
-       // result: (MVNshiftLL [c] y)
-       for {
-               x := v_0
-               if x.Op != OpARM64SLLconst {
-                       break
-               }
-               c := auxIntToInt64(x.AuxInt)
-               y := x.Args[0]
-               if !(clobberIfDead(x)) {
-                       break
-               }
-               v.reset(OpARM64MVNshiftLL)
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg(y)
-               return true
-       }
-       // match: (MVN x:(SRLconst [c] y))
-       // cond: clobberIfDead(x)
-       // result: (MVNshiftRL [c] y)
-       for {
-               x := v_0
-               if x.Op != OpARM64SRLconst {
-                       break
-               }
-               c := auxIntToInt64(x.AuxInt)
-               y := x.Args[0]
-               if !(clobberIfDead(x)) {
-                       break
-               }
-               v.reset(OpARM64MVNshiftRL)
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg(y)
-               return true
-       }
-       // match: (MVN x:(SRAconst [c] y))
-       // cond: clobberIfDead(x)
-       // result: (MVNshiftRA [c] y)
-       for {
-               x := v_0
-               if x.Op != OpARM64SRAconst {
-                       break
-               }
-               c := auxIntToInt64(x.AuxInt)
-               y := x.Args[0]
-               if !(clobberIfDead(x)) {
-                       break
-               }
-               v.reset(OpARM64MVNshiftRA)
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg(y)
-               return true
-       }
-       // match: (MVN x:(RORconst [c] y))
-       // cond: clobberIfDead(x)
-       // result: (MVNshiftRO [c] y)
-       for {
-               x := v_0
-               if x.Op != OpARM64RORconst {
-                       break
-               }
-               c := auxIntToInt64(x.AuxInt)
-               y := x.Args[0]
-               if !(clobberIfDead(x)) {
-                       break
-               }
-               v.reset(OpARM64MVNshiftRO)
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MVNshiftLL(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (MVNshiftLL (MOVDconst [c]) [d])
-       // result: (MOVDconst [^int64(uint64(c)<<uint64(d))])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(^int64(uint64(c) << uint64(d)))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MVNshiftRA(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (MVNshiftRA (MOVDconst [c]) [d])
-       // result: (MOVDconst [^(c>>uint64(d))])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(^(c >> uint64(d)))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MVNshiftRL(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (MVNshiftRL (MOVDconst [c]) [d])
-       // result: (MOVDconst [^int64(uint64(c)>>uint64(d))])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(^int64(uint64(c) >> uint64(d)))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MVNshiftRO(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (MVNshiftRO (MOVDconst [c]) [d])
-       // result: (MOVDconst [^rotateRight64(c, d)])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(^rotateRight64(c, d))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64NEG(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (NEG (MUL x y))
-       // result: (MNEG x y)
-       for {
-               if v_0.Op != OpARM64MUL {
-                       break
-               }
-               y := v_0.Args[1]
-               x := v_0.Args[0]
-               v.reset(OpARM64MNEG)
-               v.AddArg2(x, y)
-               return true
-       }
-       // match: (NEG (MULW x y))
-       // result: (MNEGW x y)
-       for {
-               if v_0.Op != OpARM64MULW {
-                       break
-               }
-               y := v_0.Args[1]
-               x := v_0.Args[0]
-               v.reset(OpARM64MNEGW)
-               v.AddArg2(x, y)
-               return true
-       }
-       // match: (NEG (NEG x))
-       // result: x
-       for {
-               if v_0.Op != OpARM64NEG {
-                       break
-               }
-               x := v_0.Args[0]
-               v.copyOf(x)
-               return true
-       }
-       // match: (NEG (MOVDconst [c]))
-       // result: (MOVDconst [-c])
-       for {
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(-c)
-               return true
-       }
-       // match: (NEG x:(SLLconst [c] y))
-       // cond: clobberIfDead(x)
-       // result: (NEGshiftLL [c] y)
-       for {
-               x := v_0
-               if x.Op != OpARM64SLLconst {
-                       break
-               }
-               c := auxIntToInt64(x.AuxInt)
-               y := x.Args[0]
-               if !(clobberIfDead(x)) {
-                       break
-               }
-               v.reset(OpARM64NEGshiftLL)
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg(y)
-               return true
-       }
-       // match: (NEG x:(SRLconst [c] y))
-       // cond: clobberIfDead(x)
-       // result: (NEGshiftRL [c] y)
-       for {
-               x := v_0
-               if x.Op != OpARM64SRLconst {
-                       break
-               }
-               c := auxIntToInt64(x.AuxInt)
-               y := x.Args[0]
-               if !(clobberIfDead(x)) {
-                       break
-               }
-               v.reset(OpARM64NEGshiftRL)
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg(y)
-               return true
-       }
-       // match: (NEG x:(SRAconst [c] y))
-       // cond: clobberIfDead(x)
-       // result: (NEGshiftRA [c] y)
-       for {
-               x := v_0
-               if x.Op != OpARM64SRAconst {
-                       break
-               }
-               c := auxIntToInt64(x.AuxInt)
-               y := x.Args[0]
-               if !(clobberIfDead(x)) {
-                       break
-               }
-               v.reset(OpARM64NEGshiftRA)
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64NEGshiftLL(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (NEGshiftLL (MOVDconst [c]) [d])
-       // result: (MOVDconst [-int64(uint64(c)<<uint64(d))])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(-int64(uint64(c) << uint64(d)))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64NEGshiftRA(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (NEGshiftRA (MOVDconst [c]) [d])
-       // result: (MOVDconst [-(c>>uint64(d))])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(-(c >> uint64(d)))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64NEGshiftRL(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (NEGshiftRL (MOVDconst [c]) [d])
-       // result: (MOVDconst [-int64(uint64(c)>>uint64(d))])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(-int64(uint64(c) >> uint64(d)))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64NotEqual(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (NotEqual (CMPconst [0] z:(AND x y)))
-       // cond: z.Uses == 1
-       // result: (NotEqual (TST x y))
-       for {
-               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
-                       break
-               }
-               z := v_0.Args[0]
-               if z.Op != OpARM64AND {
-                       break
-               }
-               y := z.Args[1]
-               x := z.Args[0]
-               if !(z.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64TST, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPWconst [0] x:(ANDconst [c] y)))
-       // cond: x.Uses == 1
-       // result: (NotEqual (TSTWconst [int32(c)] y))
-       for {
-               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
-                       break
-               }
-               x := v_0.Args[0]
-               if x.Op != OpARM64ANDconst {
-                       break
-               }
-               c := auxIntToInt64(x.AuxInt)
-               y := x.Args[0]
-               if !(x.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64TSTWconst, types.TypeFlags)
-               v0.AuxInt = int32ToAuxInt(int32(c))
-               v0.AddArg(y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPWconst [0] z:(AND x y)))
-       // cond: z.Uses == 1
-       // result: (NotEqual (TSTW x y))
-       for {
-               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
-                       break
-               }
-               z := v_0.Args[0]
-               if z.Op != OpARM64AND {
-                       break
-               }
-               y := z.Args[1]
-               x := z.Args[0]
-               if !(z.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64TSTW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPconst [0] x:(ANDconst [c] y)))
-       // cond: x.Uses == 1
-       // result: (NotEqual (TSTconst [c] y))
-       for {
-               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
-                       break
-               }
-               x := v_0.Args[0]
-               if x.Op != OpARM64ANDconst {
-                       break
-               }
-               c := auxIntToInt64(x.AuxInt)
-               y := x.Args[0]
-               if !(x.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64TSTconst, types.TypeFlags)
-               v0.AuxInt = int64ToAuxInt(c)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMP x z:(NEG y)))
-       // cond: z.Uses == 1
-       // result: (NotEqual (CMN x y))
-       for {
-               if v_0.Op != OpARM64CMP {
-                       break
-               }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               z := v_0.Args[1]
-               if z.Op != OpARM64NEG {
-                       break
-               }
-               y := z.Args[0]
-               if !(z.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPW x z:(NEG y)))
-       // cond: z.Uses == 1
-       // result: (NotEqual (CMNW x y))
-       for {
-               if v_0.Op != OpARM64CMPW {
-                       break
-               }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               z := v_0.Args[1]
-               if z.Op != OpARM64NEG {
-                       break
-               }
-               y := z.Args[0]
-               if !(z.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPconst [0] x:(ADDconst [c] y)))
-       // cond: x.Uses == 1
-       // result: (NotEqual (CMNconst [c] y))
-       for {
-               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
-                       break
-               }
-               x := v_0.Args[0]
-               if x.Op != OpARM64ADDconst {
-                       break
-               }
-               c := auxIntToInt64(x.AuxInt)
-               y := x.Args[0]
-               if !(x.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64CMNconst, types.TypeFlags)
-               v0.AuxInt = int64ToAuxInt(c)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPWconst [0] x:(ADDconst [c] y)))
-       // cond: x.Uses == 1
-       // result: (NotEqual (CMNWconst [int32(c)] y))
-       for {
-               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
-                       break
-               }
-               x := v_0.Args[0]
-               if x.Op != OpARM64ADDconst {
-                       break
-               }
-               c := auxIntToInt64(x.AuxInt)
-               y := x.Args[0]
-               if !(x.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64CMNWconst, types.TypeFlags)
-               v0.AuxInt = int32ToAuxInt(int32(c))
-               v0.AddArg(y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPconst [0] z:(ADD x y)))
-       // cond: z.Uses == 1
-       // result: (NotEqual (CMN x y))
-       for {
-               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
-                       break
-               }
-               z := v_0.Args[0]
-               if z.Op != OpARM64ADD {
-                       break
-               }
-               y := z.Args[1]
-               x := z.Args[0]
-               if !(z.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPWconst [0] z:(ADD x y)))
-       // cond: z.Uses == 1
-       // result: (NotEqual (CMNW x y))
-       for {
-               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
-                       break
-               }
-               z := v_0.Args[0]
-               if z.Op != OpARM64ADD {
-                       break
-               }
-               y := z.Args[1]
-               x := z.Args[0]
-               if !(z.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPconst [0] z:(MADD a x y)))
-       // cond: z.Uses == 1
-       // result: (NotEqual (CMN a (MUL <x.Type> x y)))
-       for {
-               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
-                       break
-               }
-               z := v_0.Args[0]
-               if z.Op != OpARM64MADD {
-                       break
-               }
-               y := z.Args[2]
-               a := z.Args[0]
-               x := z.Args[1]
-               if !(z.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
-               v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
-               v1.AddArg2(x, y)
-               v0.AddArg2(a, v1)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPconst [0] z:(MSUB a x y)))
-       // cond: z.Uses == 1
-       // result: (NotEqual (CMP a (MUL <x.Type> x y)))
-       for {
-               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
-                       break
-               }
-               z := v_0.Args[0]
-               if z.Op != OpARM64MSUB {
-                       break
-               }
-               y := z.Args[2]
-               a := z.Args[0]
-               x := z.Args[1]
-               if !(z.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
-               v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
-               v1.AddArg2(x, y)
-               v0.AddArg2(a, v1)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPWconst [0] z:(MADDW a x y)))
-       // cond: z.Uses == 1
-       // result: (NotEqual (CMNW a (MULW <x.Type> x y)))
-       for {
-               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
-                       break
-               }
-               z := v_0.Args[0]
-               if z.Op != OpARM64MADDW {
-                       break
-               }
-               y := z.Args[2]
-               a := z.Args[0]
-               x := z.Args[1]
-               if !(z.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
-               v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
-               v1.AddArg2(x, y)
-               v0.AddArg2(a, v1)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (CMPWconst [0] z:(MSUBW a x y)))
-       // cond: z.Uses == 1
-       // result: (NotEqual (CMPW a (MULW <x.Type> x y)))
-       for {
-               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
-                       break
-               }
-               z := v_0.Args[0]
-               if z.Op != OpARM64MSUBW {
-                       break
-               }
-               y := z.Args[2]
-               a := z.Args[0]
-               x := z.Args[1]
-               if !(z.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64NotEqual)
-               v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
-               v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
-               v1.AddArg2(x, y)
-               v0.AddArg2(a, v1)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (NotEqual (FlagConstant [fc]))
-       // result: (MOVDconst [b2i(fc.ne())])
-       for {
-               if v_0.Op != OpARM64FlagConstant {
-                       break
-               }
-               fc := auxIntToFlagConstant(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(b2i(fc.ne()))
-               return true
-       }
-       // match: (NotEqual (InvertFlags x))
-       // result: (NotEqual x)
-       for {
-               if v_0.Op != OpARM64InvertFlags {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpARM64NotEqual)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64OR(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OR x (MOVDconst [c]))
-       // result: (ORconst [c] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MOVDconst {
-                               continue
-                       }
-                       c := auxIntToInt64(v_1.AuxInt)
-                       v.reset(OpARM64ORconst)
-                       v.AuxInt = int64ToAuxInt(c)
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       // match: (OR x x)
-       // result: x
-       for {
-               x := v_0
-               if x != v_1 {
-                       break
-               }
-               v.copyOf(x)
-               return true
-       }
-       // match: (OR x (MVN y))
-       // result: (ORN x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpARM64MVN {
-                               continue
-                       }
-                       y := v_1.Args[0]
-                       v.reset(OpARM64ORN)
-                       v.AddArg2(x, y)
-                       return true
-               }
-               break
-       }
-       // match: (OR x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftLL x0 y [c])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       x1 := v_1
-                       if x1.Op != OpARM64SLLconst {
-                               continue
-                       }
-                       c := auxIntToInt64(x1.AuxInt)
-                       y := x1.Args[0]
-                       if !(clobberIfDead(x1)) {
-                               continue
-                       }
-                       v.reset(OpARM64ORshiftLL)
-                       v.AuxInt = int64ToAuxInt(c)
-                       v.AddArg2(x0, y)
-                       return true
-               }
-               break
-       }
-       // match: (OR x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRL x0 y [c])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       x1 := v_1
-                       if x1.Op != OpARM64SRLconst {
-                               continue
-                       }
-                       c := auxIntToInt64(x1.AuxInt)
-                       y := x1.Args[0]
-                       if !(clobberIfDead(x1)) {
-                               continue
-                       }
-                       v.reset(OpARM64ORshiftRL)
-                       v.AuxInt = int64ToAuxInt(c)
-                       v.AddArg2(x0, y)
-                       return true
-               }
-               break
-       }
-       // match: (OR x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRA x0 y [c])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       x1 := v_1
-                       if x1.Op != OpARM64SRAconst {
-                               continue
-                       }
-                       c := auxIntToInt64(x1.AuxInt)
-                       y := x1.Args[0]
-                       if !(clobberIfDead(x1)) {
-                               continue
-                       }
-                       v.reset(OpARM64ORshiftRA)
-                       v.AuxInt = int64ToAuxInt(c)
-                       v.AddArg2(x0, y)
-                       return true
-               }
-               break
-       }
-       // match: (OR x0 x1:(RORconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRO x0 y [c])
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       x1 := v_1
-                       if x1.Op != OpARM64RORconst {
-                               continue
-                       }
-                       c := auxIntToInt64(x1.AuxInt)
-                       y := x1.Args[0]
-                       if !(clobberIfDead(x1)) {
-                               continue
-                       }
-                       v.reset(OpARM64ORshiftRO)
-                       v.AuxInt = int64ToAuxInt(c)
-                       v.AddArg2(x0, y)
-                       return true
-               }
-               break
-       }
-       // match: (OR (UBFIZ [bfc] x) (ANDconst [ac] y))
-       // cond: ac == ^((1<<uint(bfc.getARM64BFwidth())-1) << uint(bfc.getARM64BFlsb()))
-       // result: (BFI [bfc] y x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpARM64UBFIZ {
-                               continue
-                       }
-                       bfc := auxIntToArm64BitField(v_0.AuxInt)
-                       x := v_0.Args[0]
-                       if v_1.Op != OpARM64ANDconst {
-                               continue
-                       }
-                       ac := auxIntToInt64(v_1.AuxInt)
-                       y := v_1.Args[0]
-                       if !(ac == ^((1<<uint(bfc.getARM64BFwidth()) - 1) << uint(bfc.getARM64BFlsb()))) {
-                               continue
-                       }
-                       v.reset(OpARM64BFI)
-                       v.AuxInt = arm64BitFieldToAuxInt(bfc)
-                       v.AddArg2(y, x)
-                       return true
-               }
-               break
-       }
-       // match: (OR (UBFX [bfc] x) (ANDconst [ac] y))
-       // cond: ac == ^(1<<uint(bfc.getARM64BFwidth())-1)
-       // result: (BFXIL [bfc] y x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpARM64UBFX {
-                               continue
-                       }
-                       bfc := auxIntToArm64BitField(v_0.AuxInt)
-                       x := v_0.Args[0]
-                       if v_1.Op != OpARM64ANDconst {
-                               continue
-                       }
-                       ac := auxIntToInt64(v_1.AuxInt)
-                       y := v_1.Args[0]
-                       if !(ac == ^(1<<uint(bfc.getARM64BFwidth()) - 1)) {
-                               continue
-                       }
-                       v.reset(OpARM64BFXIL)
-                       v.AuxInt = arm64BitFieldToAuxInt(bfc)
-                       v.AddArg2(y, x)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       s0 := o1.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 24 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i3 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       y1 := o1.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i2 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] {
-                               continue
-                       }
-                       y2 := o0.Args[1]
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x2.AuxInt)
-                       if auxToSym(x2.Aux) != s {
-                               continue
-                       }
-                       _ = x2.Args[1]
-                       if p != x2.Args[0] || mem != x2.Args[1] {
-                               continue
-                       }
-                       y3 := v_1
-                       if y3.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x3 := y3.Args[0]
-                       if x3.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x3.AuxInt)
-                       if auxToSym(x3.Aux) != s {
-                               continue
-                       }
-                       _ = x3.Args[1]
-                       if p != x3.Args[0] || mem != x3.Args[1] || !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1, x2, x3)
-                       v0 := b.NewValue0(x3.Pos, OpARM64MOVWUload, t)
-                       v.copyOf(v0)
-                       v0.Aux = symToAux(s)
-                       v1 := b.NewValue0(x3.Pos, OpOffPtr, p.Type)
-                       v1.AuxInt = int64ToAuxInt(int64(i0))
-                       v1.AddArg(p)
-                       v0.AddArg2(v1, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       s0 := o1.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 24 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUload || auxIntToInt32(x0.AuxInt) != 3 {
-                               continue
-                       }
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       y1 := o1.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUload || auxIntToInt32(x1.AuxInt) != 2 || auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] {
-                               continue
-                       }
-                       y2 := o0.Args[1]
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUload || auxIntToInt32(x2.AuxInt) != 1 || auxToSym(x2.Aux) != s {
-                               continue
-                       }
-                       _ = x2.Args[1]
-                       p1 := x2.Args[0]
-                       if p1.Op != OpARM64ADD {
-                               continue
-                       }
-                       _ = p1.Args[1]
-                       p1_0 := p1.Args[0]
-                       p1_1 := p1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, p1_0, p1_1 = _i1+1, p1_1, p1_0 {
-                               ptr1 := p1_0
-                               idx1 := p1_1
-                               if mem != x2.Args[1] {
-                                       continue
-                               }
-                               y3 := v_1
-                               if y3.Op != OpARM64MOVDnop {
-                                       continue
-                               }
-                               x3 := y3.Args[0]
-                               if x3.Op != OpARM64MOVBUloadidx {
-                                       continue
-                               }
-                               _ = x3.Args[2]
-                               ptr0 := x3.Args[0]
-                               idx0 := x3.Args[1]
-                               if mem != x3.Args[2] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2, x3)
-                               v0 := b.NewValue0(x2.Pos, OpARM64MOVWUloadidx, t)
-                               v.copyOf(v0)
-                               v0.AddArg3(ptr0, idx0, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       s0 := o1.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 24 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       mem := x0.Args[2]
-                       ptr := x0.Args[0]
-                       x0_1 := x0.Args[1]
-                       if x0_1.Op != OpARM64ADDconst || auxIntToInt64(x0_1.AuxInt) != 3 {
-                               continue
-                       }
-                       idx := x0_1.Args[0]
-                       y1 := o1.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x1.Args[2]
-                       if ptr != x1.Args[0] {
-                               continue
-                       }
-                       x1_1 := x1.Args[1]
-                       if x1_1.Op != OpARM64ADDconst || auxIntToInt64(x1_1.AuxInt) != 2 || idx != x1_1.Args[0] || mem != x1.Args[2] {
-                               continue
-                       }
-                       y2 := o0.Args[1]
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x2.Args[2]
-                       if ptr != x2.Args[0] {
-                               continue
-                       }
-                       x2_1 := x2.Args[1]
-                       if x2_1.Op != OpARM64ADDconst || auxIntToInt64(x2_1.AuxInt) != 1 || idx != x2_1.Args[0] || mem != x2.Args[2] {
-                               continue
-                       }
-                       y3 := v_1
-                       if y3.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x3 := y3.Args[0]
-                       if x3.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x3.Args[2]
-                       if ptr != x3.Args[0] || idx != x3.Args[1] || mem != x3.Args[2] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1, x2, x3)
-                       v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-                       v.copyOf(v0)
-                       v0.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       o2 := o1.Args[0]
-                       if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 24 {
-                               continue
-                       }
-                       _ = o2.Args[1]
-                       o3 := o2.Args[0]
-                       if o3.Op != OpARM64ORshiftLL || auxIntToInt64(o3.AuxInt) != 32 {
-                               continue
-                       }
-                       _ = o3.Args[1]
-                       o4 := o3.Args[0]
-                       if o4.Op != OpARM64ORshiftLL || auxIntToInt64(o4.AuxInt) != 40 {
-                               continue
-                       }
-                       _ = o4.Args[1]
-                       o5 := o4.Args[0]
-                       if o5.Op != OpARM64ORshiftLL || auxIntToInt64(o5.AuxInt) != 48 {
-                               continue
-                       }
-                       _ = o5.Args[1]
-                       s0 := o5.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 56 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i7 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       y1 := o5.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i6 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] {
-                               continue
-                       }
-                       y2 := o4.Args[1]
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i5 := auxIntToInt32(x2.AuxInt)
-                       if auxToSym(x2.Aux) != s {
-                               continue
-                       }
-                       _ = x2.Args[1]
-                       if p != x2.Args[0] || mem != x2.Args[1] {
-                               continue
-                       }
-                       y3 := o3.Args[1]
-                       if y3.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x3 := y3.Args[0]
-                       if x3.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i4 := auxIntToInt32(x3.AuxInt)
-                       if auxToSym(x3.Aux) != s {
-                               continue
-                       }
-                       _ = x3.Args[1]
-                       if p != x3.Args[0] || mem != x3.Args[1] {
-                               continue
-                       }
-                       y4 := o2.Args[1]
-                       if y4.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x4 := y4.Args[0]
-                       if x4.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i3 := auxIntToInt32(x4.AuxInt)
-                       if auxToSym(x4.Aux) != s {
-                               continue
-                       }
-                       _ = x4.Args[1]
-                       if p != x4.Args[0] || mem != x4.Args[1] {
-                               continue
-                       }
-                       y5 := o1.Args[1]
-                       if y5.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x5 := y5.Args[0]
-                       if x5.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i2 := auxIntToInt32(x5.AuxInt)
-                       if auxToSym(x5.Aux) != s {
-                               continue
-                       }
-                       _ = x5.Args[1]
-                       if p != x5.Args[0] || mem != x5.Args[1] {
-                               continue
-                       }
-                       y6 := o0.Args[1]
-                       if y6.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x6 := y6.Args[0]
-                       if x6.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x6.AuxInt)
-                       if auxToSym(x6.Aux) != s {
-                               continue
-                       }
-                       _ = x6.Args[1]
-                       if p != x6.Args[0] || mem != x6.Args[1] {
-                               continue
-                       }
-                       y7 := v_1
-                       if y7.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x7 := y7.Args[0]
-                       if x7.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x7.AuxInt)
-                       if auxToSym(x7.Aux) != s {
-                               continue
-                       }
-                       _ = x7.Args[1]
-                       if p != x7.Args[0] || mem != x7.Args[1] || !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-                       v0 := b.NewValue0(x7.Pos, OpARM64MOVDload, t)
-                       v.copyOf(v0)
-                       v0.Aux = symToAux(s)
-                       v1 := b.NewValue0(x7.Pos, OpOffPtr, p.Type)
-                       v1.AuxInt = int64ToAuxInt(int64(i0))
-                       v1.AddArg(p)
-                       v0.AddArg2(v1, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       o2 := o1.Args[0]
-                       if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 24 {
-                               continue
-                       }
-                       _ = o2.Args[1]
-                       o3 := o2.Args[0]
-                       if o3.Op != OpARM64ORshiftLL || auxIntToInt64(o3.AuxInt) != 32 {
-                               continue
-                       }
-                       _ = o3.Args[1]
-                       o4 := o3.Args[0]
-                       if o4.Op != OpARM64ORshiftLL || auxIntToInt64(o4.AuxInt) != 40 {
-                               continue
-                       }
-                       _ = o4.Args[1]
-                       o5 := o4.Args[0]
-                       if o5.Op != OpARM64ORshiftLL || auxIntToInt64(o5.AuxInt) != 48 {
-                               continue
-                       }
-                       _ = o5.Args[1]
-                       s0 := o5.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 56 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUload || auxIntToInt32(x0.AuxInt) != 7 {
-                               continue
-                       }
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       y1 := o5.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUload || auxIntToInt32(x1.AuxInt) != 6 || auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] {
-                               continue
-                       }
-                       y2 := o4.Args[1]
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUload || auxIntToInt32(x2.AuxInt) != 5 || auxToSym(x2.Aux) != s {
-                               continue
-                       }
-                       _ = x2.Args[1]
-                       if p != x2.Args[0] || mem != x2.Args[1] {
-                               continue
-                       }
-                       y3 := o3.Args[1]
-                       if y3.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x3 := y3.Args[0]
-                       if x3.Op != OpARM64MOVBUload || auxIntToInt32(x3.AuxInt) != 4 || auxToSym(x3.Aux) != s {
-                               continue
-                       }
-                       _ = x3.Args[1]
-                       if p != x3.Args[0] || mem != x3.Args[1] {
-                               continue
-                       }
-                       y4 := o2.Args[1]
-                       if y4.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x4 := y4.Args[0]
-                       if x4.Op != OpARM64MOVBUload || auxIntToInt32(x4.AuxInt) != 3 || auxToSym(x4.Aux) != s {
-                               continue
-                       }
-                       _ = x4.Args[1]
-                       if p != x4.Args[0] || mem != x4.Args[1] {
-                               continue
-                       }
-                       y5 := o1.Args[1]
-                       if y5.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x5 := y5.Args[0]
-                       if x5.Op != OpARM64MOVBUload || auxIntToInt32(x5.AuxInt) != 2 || auxToSym(x5.Aux) != s {
-                               continue
-                       }
-                       _ = x5.Args[1]
-                       if p != x5.Args[0] || mem != x5.Args[1] {
-                               continue
-                       }
-                       y6 := o0.Args[1]
-                       if y6.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x6 := y6.Args[0]
-                       if x6.Op != OpARM64MOVBUload || auxIntToInt32(x6.AuxInt) != 1 || auxToSym(x6.Aux) != s {
-                               continue
-                       }
-                       _ = x6.Args[1]
-                       p1 := x6.Args[0]
-                       if p1.Op != OpARM64ADD {
-                               continue
-                       }
-                       _ = p1.Args[1]
-                       p1_0 := p1.Args[0]
-                       p1_1 := p1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, p1_0, p1_1 = _i1+1, p1_1, p1_0 {
-                               ptr1 := p1_0
-                               idx1 := p1_1
-                               if mem != x6.Args[1] {
-                                       continue
-                               }
-                               y7 := v_1
-                               if y7.Op != OpARM64MOVDnop {
-                                       continue
-                               }
-                               x7 := y7.Args[0]
-                               if x7.Op != OpARM64MOVBUloadidx {
-                                       continue
-                               }
-                               _ = x7.Args[2]
-                               ptr0 := x7.Args[0]
-                               idx0 := x7.Args[1]
-                               if mem != x7.Args[2] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-                               v0 := b.NewValue0(x6.Pos, OpARM64MOVDloadidx, t)
-                               v.copyOf(v0)
-                               v0.AddArg3(ptr0, idx0, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       o2 := o1.Args[0]
-                       if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 24 {
-                               continue
-                       }
-                       _ = o2.Args[1]
-                       o3 := o2.Args[0]
-                       if o3.Op != OpARM64ORshiftLL || auxIntToInt64(o3.AuxInt) != 32 {
-                               continue
-                       }
-                       _ = o3.Args[1]
-                       o4 := o3.Args[0]
-                       if o4.Op != OpARM64ORshiftLL || auxIntToInt64(o4.AuxInt) != 40 {
-                               continue
-                       }
-                       _ = o4.Args[1]
-                       o5 := o4.Args[0]
-                       if o5.Op != OpARM64ORshiftLL || auxIntToInt64(o5.AuxInt) != 48 {
-                               continue
-                       }
-                       _ = o5.Args[1]
-                       s0 := o5.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 56 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       mem := x0.Args[2]
-                       ptr := x0.Args[0]
-                       x0_1 := x0.Args[1]
-                       if x0_1.Op != OpARM64ADDconst || auxIntToInt64(x0_1.AuxInt) != 7 {
-                               continue
-                       }
-                       idx := x0_1.Args[0]
-                       y1 := o5.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x1.Args[2]
-                       if ptr != x1.Args[0] {
-                               continue
-                       }
-                       x1_1 := x1.Args[1]
-                       if x1_1.Op != OpARM64ADDconst || auxIntToInt64(x1_1.AuxInt) != 6 || idx != x1_1.Args[0] || mem != x1.Args[2] {
-                               continue
-                       }
-                       y2 := o4.Args[1]
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x2.Args[2]
-                       if ptr != x2.Args[0] {
-                               continue
-                       }
-                       x2_1 := x2.Args[1]
-                       if x2_1.Op != OpARM64ADDconst || auxIntToInt64(x2_1.AuxInt) != 5 || idx != x2_1.Args[0] || mem != x2.Args[2] {
-                               continue
-                       }
-                       y3 := o3.Args[1]
-                       if y3.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x3 := y3.Args[0]
-                       if x3.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x3.Args[2]
-                       if ptr != x3.Args[0] {
-                               continue
-                       }
-                       x3_1 := x3.Args[1]
-                       if x3_1.Op != OpARM64ADDconst || auxIntToInt64(x3_1.AuxInt) != 4 || idx != x3_1.Args[0] || mem != x3.Args[2] {
-                               continue
-                       }
-                       y4 := o2.Args[1]
-                       if y4.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x4 := y4.Args[0]
-                       if x4.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x4.Args[2]
-                       if ptr != x4.Args[0] {
-                               continue
-                       }
-                       x4_1 := x4.Args[1]
-                       if x4_1.Op != OpARM64ADDconst || auxIntToInt64(x4_1.AuxInt) != 3 || idx != x4_1.Args[0] || mem != x4.Args[2] {
-                               continue
-                       }
-                       y5 := o1.Args[1]
-                       if y5.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x5 := y5.Args[0]
-                       if x5.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x5.Args[2]
-                       if ptr != x5.Args[0] {
-                               continue
-                       }
-                       x5_1 := x5.Args[1]
-                       if x5_1.Op != OpARM64ADDconst || auxIntToInt64(x5_1.AuxInt) != 2 || idx != x5_1.Args[0] || mem != x5.Args[2] {
-                               continue
-                       }
-                       y6 := o0.Args[1]
-                       if y6.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x6 := y6.Args[0]
-                       if x6.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x6.Args[2]
-                       if ptr != x6.Args[0] {
-                               continue
-                       }
-                       x6_1 := x6.Args[1]
-                       if x6_1.Op != OpARM64ADDconst || auxIntToInt64(x6_1.AuxInt) != 1 || idx != x6_1.Args[0] || mem != x6.Args[2] {
-                               continue
-                       }
-                       y7 := v_1
-                       if y7.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x7 := y7.Args[0]
-                       if x7.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x7.Args[2]
-                       if ptr != x7.Args[0] || idx != x7.Args[1] || mem != x7.Args[2] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-                       v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-                       v.copyOf(v0)
-                       v0.AddArg3(ptr, idx, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       s0 := o1.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 24 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       y1 := o1.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] {
-                               continue
-                       }
-                       y2 := o0.Args[1]
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i2 := auxIntToInt32(x2.AuxInt)
-                       if auxToSym(x2.Aux) != s {
-                               continue
-                       }
-                       _ = x2.Args[1]
-                       if p != x2.Args[0] || mem != x2.Args[1] {
-                               continue
-                       }
-                       y3 := v_1
-                       if y3.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x3 := y3.Args[0]
-                       if x3.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i3 := auxIntToInt32(x3.AuxInt)
-                       if auxToSym(x3.Aux) != s {
-                               continue
-                       }
-                       _ = x3.Args[1]
-                       if p != x3.Args[0] || mem != x3.Args[1] || !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1, x2, x3)
-                       v0 := b.NewValue0(x3.Pos, OpARM64REVW, t)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x3.Pos, OpARM64MOVWUload, t)
-                       v1.Aux = symToAux(s)
-                       v2 := b.NewValue0(x3.Pos, OpOffPtr, p.Type)
-                       v2.AuxInt = int64ToAuxInt(int64(i0))
-                       v2.AddArg(p)
-                       v1.AddArg2(v2, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       s0 := o1.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 24 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       mem := x0.Args[2]
-                       ptr0 := x0.Args[0]
-                       idx0 := x0.Args[1]
-                       y1 := o1.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUload || auxIntToInt32(x1.AuxInt) != 1 {
-                               continue
-                       }
-                       s := auxToSym(x1.Aux)
-                       _ = x1.Args[1]
-                       p1 := x1.Args[0]
-                       if p1.Op != OpARM64ADD {
-                               continue
-                       }
-                       _ = p1.Args[1]
-                       p1_0 := p1.Args[0]
-                       p1_1 := p1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, p1_0, p1_1 = _i1+1, p1_1, p1_0 {
-                               ptr1 := p1_0
-                               idx1 := p1_1
-                               if mem != x1.Args[1] {
-                                       continue
-                               }
-                               y2 := o0.Args[1]
-                               if y2.Op != OpARM64MOVDnop {
-                                       continue
-                               }
-                               x2 := y2.Args[0]
-                               if x2.Op != OpARM64MOVBUload || auxIntToInt32(x2.AuxInt) != 2 || auxToSym(x2.Aux) != s {
-                                       continue
-                               }
-                               _ = x2.Args[1]
-                               p := x2.Args[0]
-                               if mem != x2.Args[1] {
-                                       continue
-                               }
-                               y3 := v_1
-                               if y3.Op != OpARM64MOVDnop {
-                                       continue
-                               }
-                               x3 := y3.Args[0]
-                               if x3.Op != OpARM64MOVBUload || auxIntToInt32(x3.AuxInt) != 3 || auxToSym(x3.Aux) != s {
-                                       continue
-                               }
-                               _ = x3.Args[1]
-                               if p != x3.Args[0] || mem != x3.Args[1] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2, x3)
-                               v0 := b.NewValue0(x3.Pos, OpARM64REVW, t)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x3.Pos, OpARM64MOVWUloadidx, t)
-                               v1.AddArg3(ptr0, idx0, mem)
-                               v0.AddArg(v1)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       s0 := o1.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 24 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       mem := x0.Args[2]
-                       ptr := x0.Args[0]
-                       idx := x0.Args[1]
-                       y1 := o1.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x1.Args[2]
-                       if ptr != x1.Args[0] {
-                               continue
-                       }
-                       x1_1 := x1.Args[1]
-                       if x1_1.Op != OpARM64ADDconst || auxIntToInt64(x1_1.AuxInt) != 1 || idx != x1_1.Args[0] || mem != x1.Args[2] {
-                               continue
-                       }
-                       y2 := o0.Args[1]
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x2.Args[2]
-                       if ptr != x2.Args[0] {
-                               continue
-                       }
-                       x2_1 := x2.Args[1]
-                       if x2_1.Op != OpARM64ADDconst || auxIntToInt64(x2_1.AuxInt) != 2 || idx != x2_1.Args[0] || mem != x2.Args[2] {
-                               continue
-                       }
-                       y3 := v_1
-                       if y3.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x3 := y3.Args[0]
-                       if x3.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x3.Args[2]
-                       if ptr != x3.Args[0] {
-                               continue
-                       }
-                       x3_1 := x3.Args[1]
-                       if x3_1.Op != OpARM64ADDconst || auxIntToInt64(x3_1.AuxInt) != 3 || idx != x3_1.Args[0] || mem != x3.Args[2] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1, x2, x3)
-                       v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-                       v1.AddArg3(ptr, idx, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       o2 := o1.Args[0]
-                       if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 24 {
-                               continue
-                       }
-                       _ = o2.Args[1]
-                       o3 := o2.Args[0]
-                       if o3.Op != OpARM64ORshiftLL || auxIntToInt64(o3.AuxInt) != 32 {
-                               continue
-                       }
-                       _ = o3.Args[1]
-                       o4 := o3.Args[0]
-                       if o4.Op != OpARM64ORshiftLL || auxIntToInt64(o4.AuxInt) != 40 {
-                               continue
-                       }
-                       _ = o4.Args[1]
-                       o5 := o4.Args[0]
-                       if o5.Op != OpARM64ORshiftLL || auxIntToInt64(o5.AuxInt) != 48 {
-                               continue
-                       }
-                       _ = o5.Args[1]
-                       s0 := o5.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 56 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       y1 := o5.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] {
-                               continue
-                       }
-                       y2 := o4.Args[1]
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i2 := auxIntToInt32(x2.AuxInt)
-                       if auxToSym(x2.Aux) != s {
-                               continue
-                       }
-                       _ = x2.Args[1]
-                       if p != x2.Args[0] || mem != x2.Args[1] {
-                               continue
-                       }
-                       y3 := o3.Args[1]
-                       if y3.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x3 := y3.Args[0]
-                       if x3.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i3 := auxIntToInt32(x3.AuxInt)
-                       if auxToSym(x3.Aux) != s {
-                               continue
-                       }
-                       _ = x3.Args[1]
-                       if p != x3.Args[0] || mem != x3.Args[1] {
-                               continue
-                       }
-                       y4 := o2.Args[1]
-                       if y4.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x4 := y4.Args[0]
-                       if x4.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i4 := auxIntToInt32(x4.AuxInt)
-                       if auxToSym(x4.Aux) != s {
-                               continue
-                       }
-                       _ = x4.Args[1]
-                       if p != x4.Args[0] || mem != x4.Args[1] {
-                               continue
-                       }
-                       y5 := o1.Args[1]
-                       if y5.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x5 := y5.Args[0]
-                       if x5.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i5 := auxIntToInt32(x5.AuxInt)
-                       if auxToSym(x5.Aux) != s {
-                               continue
-                       }
-                       _ = x5.Args[1]
-                       if p != x5.Args[0] || mem != x5.Args[1] {
-                               continue
-                       }
-                       y6 := o0.Args[1]
-                       if y6.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x6 := y6.Args[0]
-                       if x6.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i6 := auxIntToInt32(x6.AuxInt)
-                       if auxToSym(x6.Aux) != s {
-                               continue
-                       }
-                       _ = x6.Args[1]
-                       if p != x6.Args[0] || mem != x6.Args[1] {
-                               continue
-                       }
-                       y7 := v_1
-                       if y7.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x7 := y7.Args[0]
-                       if x7.Op != OpARM64MOVBUload {
-                               continue
-                       }
-                       i7 := auxIntToInt32(x7.AuxInt)
-                       if auxToSym(x7.Aux) != s {
-                               continue
-                       }
-                       _ = x7.Args[1]
-                       if p != x7.Args[0] || mem != x7.Args[1] || !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-                       v0 := b.NewValue0(x7.Pos, OpARM64REV, t)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x7.Pos, OpARM64MOVDload, t)
-                       v1.Aux = symToAux(s)
-                       v2 := b.NewValue0(x7.Pos, OpOffPtr, p.Type)
-                       v2.AuxInt = int64ToAuxInt(int64(i0))
-                       v2.AddArg(p)
-                       v1.AddArg2(v2, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       o2 := o1.Args[0]
-                       if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 24 {
-                               continue
-                       }
-                       _ = o2.Args[1]
-                       o3 := o2.Args[0]
-                       if o3.Op != OpARM64ORshiftLL || auxIntToInt64(o3.AuxInt) != 32 {
-                               continue
-                       }
-                       _ = o3.Args[1]
-                       o4 := o3.Args[0]
-                       if o4.Op != OpARM64ORshiftLL || auxIntToInt64(o4.AuxInt) != 40 {
-                               continue
-                       }
-                       _ = o4.Args[1]
-                       o5 := o4.Args[0]
-                       if o5.Op != OpARM64ORshiftLL || auxIntToInt64(o5.AuxInt) != 48 {
-                               continue
-                       }
-                       _ = o5.Args[1]
-                       s0 := o5.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 56 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       mem := x0.Args[2]
-                       ptr0 := x0.Args[0]
-                       idx0 := x0.Args[1]
-                       y1 := o5.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUload || auxIntToInt32(x1.AuxInt) != 1 {
-                               continue
-                       }
-                       s := auxToSym(x1.Aux)
-                       _ = x1.Args[1]
-                       p1 := x1.Args[0]
-                       if p1.Op != OpARM64ADD {
-                               continue
-                       }
-                       _ = p1.Args[1]
-                       p1_0 := p1.Args[0]
-                       p1_1 := p1.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, p1_0, p1_1 = _i1+1, p1_1, p1_0 {
-                               ptr1 := p1_0
-                               idx1 := p1_1
-                               if mem != x1.Args[1] {
-                                       continue
-                               }
-                               y2 := o4.Args[1]
-                               if y2.Op != OpARM64MOVDnop {
-                                       continue
-                               }
-                               x2 := y2.Args[0]
-                               if x2.Op != OpARM64MOVBUload || auxIntToInt32(x2.AuxInt) != 2 || auxToSym(x2.Aux) != s {
-                                       continue
-                               }
-                               _ = x2.Args[1]
-                               p := x2.Args[0]
-                               if mem != x2.Args[1] {
-                                       continue
-                               }
-                               y3 := o3.Args[1]
-                               if y3.Op != OpARM64MOVDnop {
-                                       continue
-                               }
-                               x3 := y3.Args[0]
-                               if x3.Op != OpARM64MOVBUload || auxIntToInt32(x3.AuxInt) != 3 || auxToSym(x3.Aux) != s {
-                                       continue
-                               }
-                               _ = x3.Args[1]
-                               if p != x3.Args[0] || mem != x3.Args[1] {
-                                       continue
-                               }
-                               y4 := o2.Args[1]
-                               if y4.Op != OpARM64MOVDnop {
-                                       continue
-                               }
-                               x4 := y4.Args[0]
-                               if x4.Op != OpARM64MOVBUload || auxIntToInt32(x4.AuxInt) != 4 || auxToSym(x4.Aux) != s {
-                                       continue
-                               }
-                               _ = x4.Args[1]
-                               if p != x4.Args[0] || mem != x4.Args[1] {
-                                       continue
-                               }
-                               y5 := o1.Args[1]
-                               if y5.Op != OpARM64MOVDnop {
-                                       continue
-                               }
-                               x5 := y5.Args[0]
-                               if x5.Op != OpARM64MOVBUload || auxIntToInt32(x5.AuxInt) != 5 || auxToSym(x5.Aux) != s {
-                                       continue
-                               }
-                               _ = x5.Args[1]
-                               if p != x5.Args[0] || mem != x5.Args[1] {
-                                       continue
-                               }
-                               y6 := o0.Args[1]
-                               if y6.Op != OpARM64MOVDnop {
-                                       continue
-                               }
-                               x6 := y6.Args[0]
-                               if x6.Op != OpARM64MOVBUload || auxIntToInt32(x6.AuxInt) != 6 || auxToSym(x6.Aux) != s {
-                                       continue
-                               }
-                               _ = x6.Args[1]
-                               if p != x6.Args[0] || mem != x6.Args[1] {
-                                       continue
-                               }
-                               y7 := v_1
-                               if y7.Op != OpARM64MOVDnop {
-                                       continue
-                               }
-                               x7 := y7.Args[0]
-                               if x7.Op != OpARM64MOVBUload || auxIntToInt32(x7.AuxInt) != 7 || auxToSym(x7.Aux) != s {
-                                       continue
-                               }
-                               _ = x7.Args[1]
-                               if p != x7.Args[0] || mem != x7.Args[1] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-                               v0 := b.NewValue0(x7.Pos, OpARM64REV, t)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x7.Pos, OpARM64MOVDloadidx, t)
-                               v1.AddArg3(ptr0, idx0, mem)
-                               v0.AddArg(v1)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       o0 := v_0
-                       if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 8 {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o1 := o0.Args[0]
-                       if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 16 {
-                               continue
-                       }
-                       _ = o1.Args[1]
-                       o2 := o1.Args[0]
-                       if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 24 {
-                               continue
-                       }
-                       _ = o2.Args[1]
-                       o3 := o2.Args[0]
-                       if o3.Op != OpARM64ORshiftLL || auxIntToInt64(o3.AuxInt) != 32 {
-                               continue
-                       }
-                       _ = o3.Args[1]
-                       o4 := o3.Args[0]
-                       if o4.Op != OpARM64ORshiftLL || auxIntToInt64(o4.AuxInt) != 40 {
-                               continue
-                       }
-                       _ = o4.Args[1]
-                       o5 := o4.Args[0]
-                       if o5.Op != OpARM64ORshiftLL || auxIntToInt64(o5.AuxInt) != 48 {
-                               continue
-                       }
-                       _ = o5.Args[1]
-                       s0 := o5.Args[0]
-                       if s0.Op != OpARM64SLLconst || auxIntToInt64(s0.AuxInt) != 56 {
-                               continue
-                       }
-                       y0 := s0.Args[0]
-                       if y0.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x0 := y0.Args[0]
-                       if x0.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       mem := x0.Args[2]
-                       ptr := x0.Args[0]
-                       idx := x0.Args[1]
-                       y1 := o5.Args[1]
-                       if y1.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x1.Args[2]
-                       if ptr != x1.Args[0] {
-                               continue
-                       }
-                       x1_1 := x1.Args[1]
-                       if x1_1.Op != OpARM64ADDconst || auxIntToInt64(x1_1.AuxInt) != 1 || idx != x1_1.Args[0] || mem != x1.Args[2] {
-                               continue
-                       }
-                       y2 := o4.Args[1]
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x2.Args[2]
-                       if ptr != x2.Args[0] {
-                               continue
-                       }
-                       x2_1 := x2.Args[1]
-                       if x2_1.Op != OpARM64ADDconst || auxIntToInt64(x2_1.AuxInt) != 2 || idx != x2_1.Args[0] || mem != x2.Args[2] {
-                               continue
-                       }
-                       y3 := o3.Args[1]
-                       if y3.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x3 := y3.Args[0]
-                       if x3.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x3.Args[2]
-                       if ptr != x3.Args[0] {
-                               continue
-                       }
-                       x3_1 := x3.Args[1]
-                       if x3_1.Op != OpARM64ADDconst || auxIntToInt64(x3_1.AuxInt) != 3 || idx != x3_1.Args[0] || mem != x3.Args[2] {
-                               continue
-                       }
-                       y4 := o2.Args[1]
-                       if y4.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x4 := y4.Args[0]
-                       if x4.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x4.Args[2]
-                       if ptr != x4.Args[0] {
-                               continue
-                       }
-                       x4_1 := x4.Args[1]
-                       if x4_1.Op != OpARM64ADDconst || auxIntToInt64(x4_1.AuxInt) != 4 || idx != x4_1.Args[0] || mem != x4.Args[2] {
-                               continue
-                       }
-                       y5 := o1.Args[1]
-                       if y5.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x5 := y5.Args[0]
-                       if x5.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x5.Args[2]
-                       if ptr != x5.Args[0] {
-                               continue
-                       }
-                       x5_1 := x5.Args[1]
-                       if x5_1.Op != OpARM64ADDconst || auxIntToInt64(x5_1.AuxInt) != 5 || idx != x5_1.Args[0] || mem != x5.Args[2] {
-                               continue
-                       }
-                       y6 := o0.Args[1]
-                       if y6.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x6 := y6.Args[0]
-                       if x6.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x6.Args[2]
-                       if ptr != x6.Args[0] {
-                               continue
-                       }
-                       x6_1 := x6.Args[1]
-                       if x6_1.Op != OpARM64ADDconst || auxIntToInt64(x6_1.AuxInt) != 6 || idx != x6_1.Args[0] || mem != x6.Args[2] {
-                               continue
-                       }
-                       y7 := v_1
-                       if y7.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x7 := y7.Args[0]
-                       if x7.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x7.Args[2]
-                       if ptr != x7.Args[0] {
-                               continue
-                       }
-                       x7_1 := x7.Args[1]
-                       if x7_1.Op != OpARM64ADDconst || auxIntToInt64(x7_1.AuxInt) != 7 || idx != x7_1.Args[0] || mem != x7.Args[2] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-                       v0 := b.NewValue0(v.Pos, OpARM64REV, t)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-                       v1.AddArg3(ptr, idx, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORN(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ORN x (MOVDconst [c]))
-       // result: (ORconst [^c] x)
-       for {
-               x := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64ToAuxInt(^c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORN x x)
-       // result: (MOVDconst [-1])
-       for {
-               x := v_0
-               if x != v_1 {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(-1)
-               return true
-       }
-       // match: (ORN x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORNshiftLL x0 y [c])
-       for {
-               x0 := v_0
-               x1 := v_1
-               if x1.Op != OpARM64SLLconst {
-                       break
-               }
-               c := auxIntToInt64(x1.AuxInt)
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
-                       break
-               }
-               v.reset(OpARM64ORNshiftLL)
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg2(x0, y)
-               return true
-       }
-       // match: (ORN x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORNshiftRL x0 y [c])
-       for {
-               x0 := v_0
-               x1 := v_1
-               if x1.Op != OpARM64SRLconst {
-                       break
-               }
-               c := auxIntToInt64(x1.AuxInt)
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
-                       break
-               }
-               v.reset(OpARM64ORNshiftRL)
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg2(x0, y)
-               return true
-       }
-       // match: (ORN x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORNshiftRA x0 y [c])
-       for {
-               x0 := v_0
-               x1 := v_1
-               if x1.Op != OpARM64SRAconst {
-                       break
-               }
-               c := auxIntToInt64(x1.AuxInt)
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
-                       break
-               }
-               v.reset(OpARM64ORNshiftRA)
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg2(x0, y)
-               return true
-       }
-       // match: (ORN x0 x1:(RORconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORNshiftRO x0 y [c])
-       for {
-               x0 := v_0
-               x1 := v_1
-               if x1.Op != OpARM64RORconst {
-                       break
-               }
-               c := auxIntToInt64(x1.AuxInt)
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
-                       break
-               }
-               v.reset(OpARM64ORNshiftRO)
-               v.AuxInt = int64ToAuxInt(c)
-               v.AddArg2(x0, y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORNshiftLL(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ORNshiftLL x (MOVDconst [c]) [d])
-       // result: (ORconst x [^int64(uint64(c)<<uint64(d))])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               x := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64ToAuxInt(^int64(uint64(c) << uint64(d)))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORNshiftLL (SLLconst x [c]) x [c])
-       // result: (MOVDconst [-1])
-       for {
-               c := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != c {
-                       break
-               }
-               x := v_0.Args[0]
-               if x != v_1 {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(-1)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORNshiftRA(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ORNshiftRA x (MOVDconst [c]) [d])
-       // result: (ORconst x [^(c>>uint64(d))])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               x := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64ToAuxInt(^(c >> uint64(d)))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORNshiftRA (SRAconst x [c]) x [c])
-       // result: (MOVDconst [-1])
-       for {
-               c := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64SRAconst || auxIntToInt64(v_0.AuxInt) != c {
-                       break
-               }
-               x := v_0.Args[0]
-               if x != v_1 {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(-1)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORNshiftRL(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ORNshiftRL x (MOVDconst [c]) [d])
-       // result: (ORconst x [^int64(uint64(c)>>uint64(d))])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               x := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64ToAuxInt(^int64(uint64(c) >> uint64(d)))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORNshiftRL (SRLconst x [c]) x [c])
-       // result: (MOVDconst [-1])
-       for {
-               c := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != c {
-                       break
-               }
-               x := v_0.Args[0]
-               if x != v_1 {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(-1)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORNshiftRO(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ORNshiftRO x (MOVDconst [c]) [d])
-       // result: (ORconst x [^rotateRight64(c, d)])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               x := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64ToAuxInt(^rotateRight64(c, d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORNshiftRO (RORconst x [c]) x [c])
-       // result: (MOVDconst [-1])
-       for {
-               c := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64RORconst || auxIntToInt64(v_0.AuxInt) != c {
-                       break
-               }
-               x := v_0.Args[0]
-               if x != v_1 {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(-1)
-               return true
+               break
        }
-       return false
-}
-func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (ORconst [0] x)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==1
        // result: x
        for {
-               if auxIntToInt64(v.AuxInt) != 0 {
-                       break
-               }
-               x := v_0
-               v.copyOf(x)
-               return true
-       }
-       // match: (ORconst [-1] _)
-       // result: (MOVDconst [-1])
-       for {
-               if auxIntToInt64(v.AuxInt) != -1 {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(-1)
-               return true
-       }
-       // match: (ORconst [c] (MOVDconst [d]))
-       // result: (MOVDconst [c|d])
-       for {
-               c := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               d := auxIntToInt64(v_0.AuxInt)
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64ToAuxInt(c | d)
-               return true
-       }
-       // match: (ORconst [c] (ORconst [d] x))
-       // result: (ORconst [c|d] x)
-       for {
-               c := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64ORconst {
-                       break
-               }
-               d := auxIntToInt64(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64ToAuxInt(c | d)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORconst [c1] (ANDconst [c2] x))
-       // cond: c2|c1 == ^0
-       // result: (ORconst [c1] x)
-       for {
-               c1 := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64ANDconst {
-                       break
-               }
-               c2 := auxIntToInt64(v_0.AuxInt)
-               x := v_0.Args[0]
-               if !(c2|c1 == ^0) {
-                       break
-               }
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64ToAuxInt(c1)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (ORshiftLL (MOVDconst [c]) x [d])
-       // result: (ORconst [c] (SLLconst <x.Type> x [d]))
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               x := v_1
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64ToAuxInt(c)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = int64ToAuxInt(d)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (ORshiftLL x (MOVDconst [c]) [d])
-       // result: (ORconst x [int64(uint64(c)<<uint64(d))])
-       for {
-               d := auxIntToInt64(v.AuxInt)
-               x := v_0
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64ToAuxInt(int64(uint64(c) << uint64(d)))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL y:(SLLconst x [c]) x [c])
-       // result: y
-       for {
-               c := auxIntToInt64(v.AuxInt)
-               y := v_0
-               if y.Op != OpARM64SLLconst || auxIntToInt64(y.AuxInt) != c {
-                       break
-               }
-               x := y.Args[0]
-               if x != v_1 {
-                       break
-               }
-               v.copyOf(y)
-               return true
-       }
-       // match: (ORshiftLL <typ.UInt16> [8] (UBFX <typ.UInt16> [armBFAuxInt(8, 8)] x) x)
-       // result: (REV16W x)
-       for {
-               if v.Type != typ.UInt16 || auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64UBFX || v_0.Type != typ.UInt16 || auxIntToArm64BitField(v_0.AuxInt) != armBFAuxInt(8, 8) {
-                       break
-               }
-               x := v_0.Args[0]
-               if x != v_1 {
-                       break
-               }
-               v.reset(OpARM64REV16W)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL [8] (UBFX [armBFAuxInt(8, 24)] (ANDconst [c1] x)) (ANDconst [c2] x))
-       // cond: uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff
-       // result: (REV16W x)
-       for {
-               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64UBFX || auxIntToArm64BitField(v_0.AuxInt) != armBFAuxInt(8, 24) {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpARM64ANDconst {
-                       break
-               }
-               c1 := auxIntToInt64(v_0_0.AuxInt)
-               x := v_0_0.Args[0]
-               if v_1.Op != OpARM64ANDconst {
-                       break
-               }
-               c2 := auxIntToInt64(v_1.AuxInt)
-               if x != v_1.Args[0] || !(uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff) {
-                       break
-               }
-               v.reset(OpARM64REV16W)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
-       // cond: (uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff)
-       // result: (REV16 x)
-       for {
-               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 8 {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpARM64ANDconst {
-                       break
-               }
-               c1 := auxIntToInt64(v_0_0.AuxInt)
-               x := v_0_0.Args[0]
-               if v_1.Op != OpARM64ANDconst {
-                       break
-               }
-               c2 := auxIntToInt64(v_1.AuxInt)
-               if x != v_1.Args[0] || !(uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff) {
-                       break
-               }
-               v.reset(OpARM64REV16)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
-       // cond: (uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff)
-       // result: (REV16 (ANDconst <x.Type> [0xffffffff] x))
-       for {
-               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 8 {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpARM64ANDconst {
-                       break
-               }
-               c1 := auxIntToInt64(v_0_0.AuxInt)
-               x := v_0_0.Args[0]
-               if v_1.Op != OpARM64ANDconst {
-                       break
-               }
-               c2 := auxIntToInt64(v_1.AuxInt)
-               if x != v_1.Args[0] || !(uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff) {
-                       break
-               }
-               v.reset(OpARM64REV16)
-               v0 := b.NewValue0(v.Pos, OpARM64ANDconst, x.Type)
-               v0.AuxInt = int64ToAuxInt(0xffffffff)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: ( ORshiftLL [c] (SRLconst x [64-c]) x2)
-       // result: (EXTRconst [64-c] x2 x)
-       for {
-               c := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 64-c {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(int32(c) == 1) {
+                               continue
+                       }
+                       v.copyOf(x)
+                       return true
                }
-               x := v_0.Args[0]
-               x2 := v_1
-               v.reset(OpARM64EXTRconst)
-               v.AuxInt = int64ToAuxInt(64 - c)
-               v.AddArg2(x2, x)
-               return true
+               break
        }
-       // match: ( ORshiftLL <t> [c] (UBFX [bfc] x) x2)
-       // cond: c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
-       // result: (EXTRWconst [32-c] x2 x)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c)
+       // result: (SLLconst [log64(c)] x)
        for {
-               t := v.Type
-               c := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64UBFX {
-                       break
-               }
-               bfc := auxIntToArm64BitField(v_0.AuxInt)
-               x := v_0.Args[0]
-               x2 := v_1
-               if !(c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(isPowerOfTwo64(c)) {
+                               continue
+                       }
+                       v.reset(OpARM64SLLconst)
+                       v.AuxInt = int64ToAuxInt(log64(c))
+                       v.AddArg(x)
+                       return true
                }
-               v.reset(OpARM64EXTRWconst)
-               v.AuxInt = int64ToAuxInt(32 - c)
-               v.AddArg2(x2, x)
-               return true
+               break
        }
-       // match: (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y))
-       // cond: sc == bfc.getARM64BFwidth()
-       // result: (BFXIL [bfc] y x)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log64(c-1)])
        for {
-               sc := auxIntToInt64(v.AuxInt)
-               if v_0.Op != OpARM64UBFX {
-                       break
-               }
-               bfc := auxIntToArm64BitField(v_0.AuxInt)
-               x := v_0.Args[0]
-               if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != sc {
-                       break
-               }
-               y := v_1.Args[0]
-               if !(sc == bfc.getARM64BFwidth()) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(isPowerOfTwo64(c-1) && int32(c) >= 3) {
+                               continue
+                       }
+                       v.reset(OpARM64ADDshiftLL)
+                       v.AuxInt = int64ToAuxInt(log64(c - 1))
+                       v.AddArg2(x, x)
+                       return true
                }
-               v.reset(OpARM64BFXIL)
-               v.AuxInt = arm64BitFieldToAuxInt(bfc)
-               v.AddArg2(y, x)
-               return true
+               break
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, y0, y1)
-       // result: @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo64(c+1) && int32(c) >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log64(c+1)])
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 8 {
-                       break
-               }
-               y0 := v_0
-               if y0.Op != OpARM64MOVDnop {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
-                       break
-               }
-               i0 := auxIntToInt32(x0.AuxInt)
-               s := auxToSym(x0.Aux)
-               mem := x0.Args[1]
-               p := x0.Args[0]
-               y1 := v_1
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
-                       break
-               }
-               i1 := auxIntToInt32(x1.AuxInt)
-               if auxToSym(x1.Aux) != s {
-                       break
-               }
-               _ = x1.Args[1]
-               if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, y0, y1)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(isPowerOfTwo64(c+1) && int32(c) >= 7) {
+                               continue
+                       }
+                       v.reset(OpARM64ADDshiftLL)
+                       v.AuxInt = int64ToAuxInt(log64(c + 1))
+                       v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+                       v0.AddArg(x)
+                       v.AddArg2(v0, x)
+                       return true
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(x1.Pos, OpARM64MOVHUload, t)
-               v.copyOf(v0)
-               v0.Aux = symToAux(s)
-               v1 := b.NewValue0(x1.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = int64ToAuxInt(int64(i0))
-               v1.AddArg(p)
-               v0.AddArg2(v1, mem)
-               return true
+               break
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0, x1, y0, y1)
-       // result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)
+       // result: (SLLconst [log64(c/3)] (ADDshiftLL <x.Type> x x [1]))
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 8 {
-                       break
-               }
-               y0 := v_0
-               if y0.Op != OpARM64MOVDnop {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               mem := x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               y1 := v_1
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload || auxIntToInt32(x1.AuxInt) != 1 {
-                       break
-               }
-               s := auxToSym(x1.Aux)
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
-                       break
-               }
-               _ = p1.Args[1]
-               p1_0 := p1.Args[0]
-               p1_1 := p1.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, p1_0, p1_1 = _i0+1, p1_1, p1_0 {
-                       ptr1 := p1_0
-                       idx1 := p1_1
-                       if mem != x1.Args[1] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0, x1, y0, y1)) {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c)) {
                                continue
                        }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpARM64MOVHUloadidx, t)
-                       v.copyOf(v0)
-                       v0.AddArg3(ptr0, idx0, mem)
+                       v.reset(OpARM64SLLconst)
+                       v.AuxInt = int64ToAuxInt(log64(c / 3))
+                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+                       v0.AuxInt = int64ToAuxInt(1)
+                       v0.AddArg2(x, x)
+                       v.AddArg(v0)
                        return true
                }
                break
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, y0, y1)
-       // result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)
+       // result: (SLLconst [log64(c/5)] (ADDshiftLL <x.Type> x x [2]))
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 8 {
-                       break
-               }
-               y0 := v_0
-               if y0.Op != OpARM64MOVDnop {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               mem := x0.Args[2]
-               ptr := x0.Args[0]
-               idx := x0.Args[1]
-               y1 := v_1
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
-                       break
-               }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst || auxIntToInt64(x1_1.AuxInt) != 1 || idx != x1_1.Args[0] || mem != x1.Args[2] || !(x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, y0, y1)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c)) {
+                               continue
+                       }
+                       v.reset(OpARM64SLLconst)
+                       v.AuxInt = int64ToAuxInt(log64(c / 5))
+                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+                       v0.AuxInt = int64ToAuxInt(2)
+                       v0.AddArg2(x, x)
+                       v.AddArg(v0)
+                       return true
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
-               v.copyOf(v0)
-               v0.AddArg3(ptr, idx, mem)
-               return true
+               break
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
-       // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, y1, y2, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)
+       // result: (SLLconst [log64(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 24 {
-                       break
-               }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 16 {
-                       break
-               }
-               _ = o0.Args[1]
-               x0 := o0.Args[0]
-               if x0.Op != OpARM64MOVHUload {
-                       break
-               }
-               i0 := auxIntToInt32(x0.AuxInt)
-               s := auxToSym(x0.Aux)
-               mem := x0.Args[1]
-               p := x0.Args[0]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
-                       break
-               }
-               i2 := auxIntToInt32(x1.AuxInt)
-               if auxToSym(x1.Aux) != s {
-                       break
-               }
-               _ = x1.Args[1]
-               if p != x1.Args[0] || mem != x1.Args[1] {
-                       break
-               }
-               y2 := v_1
-               if y2.Op != OpARM64MOVDnop {
-                       break
-               }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
-                       break
-               }
-               i3 := auxIntToInt32(x2.AuxInt)
-               if auxToSym(x2.Aux) != s {
-                       break
-               }
-               _ = x2.Args[1]
-               if p != x2.Args[0] || mem != x2.Args[1] || !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, y1, y2, o0)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c)) {
+                               continue
+                       }
+                       v.reset(OpARM64SLLconst)
+                       v.AuxInt = int64ToAuxInt(log64(c / 7))
+                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+                       v0.AuxInt = int64ToAuxInt(3)
+                       v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+                       v1.AddArg(x)
+                       v0.AddArg2(v1, x)
+                       v.AddArg(v0)
+                       return true
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(x2.Pos, OpARM64MOVWUload, t)
-               v.copyOf(v0)
-               v0.Aux = symToAux(s)
-               v1 := b.NewValue0(x2.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = int64ToAuxInt(int64(i0))
-               v1.AddArg(p)
-               v0.AddArg2(v1, mem)
-               return true
+               break
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, y1, y2, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)
+       // result: (SLLconst [log64(c/9)] (ADDshiftLL <x.Type> x x [3]))
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 24 {
-                       break
-               }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 16 {
-                       break
-               }
-               _ = o0.Args[1]
-               x0 := o0.Args[0]
-               if x0.Op != OpARM64MOVHUloadidx {
-                       break
-               }
-               mem := x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload || auxIntToInt32(x1.AuxInt) != 2 {
-                       break
-               }
-               s := auxToSym(x1.Aux)
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       if !(c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c)) {
+                               continue
+                       }
+                       v.reset(OpARM64SLLconst)
+                       v.AuxInt = int64ToAuxInt(log64(c / 9))
+                       v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+                       v0.AuxInt = int64ToAuxInt(3)
+                       v0.AddArg2(x, x)
+                       v.AddArg(v0)
+                       return true
                }
-               _ = p1.Args[1]
-               p1_0 := p1.Args[0]
-               p1_1 := p1.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, p1_0, p1_1 = _i0+1, p1_1, p1_0 {
-                       ptr1 := p1_0
-                       idx1 := p1_1
-                       if mem != x1.Args[1] {
-                               continue
-                       }
-                       y2 := v_1
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUload || auxIntToInt32(x2.AuxInt) != 3 || auxToSym(x2.Aux) != s {
+               break
+       }
+       // match: (MULW (MOVDconst [c]) (MOVDconst [d]))
+       // result: (MOVDconst [int64(int32(c)*int32(d))])
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpARM64MOVDconst {
                                continue
                        }
-                       _ = x2.Args[1]
-                       p := x2.Args[0]
-                       if mem != x2.Args[1] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, y1, y2, o0)) {
+                       c := auxIntToInt64(v_0.AuxInt)
+                       if v_1.Op != OpARM64MOVDconst {
                                continue
                        }
-                       b = mergePoint(b, x0, x1, x2)
-                       v0 := b.NewValue0(x2.Pos, OpARM64MOVWUloadidx, t)
-                       v.copyOf(v0)
-                       v0.AddArg3(ptr0, idx0, mem)
+                       d := auxIntToInt64(v_1.AuxInt)
+                       v.reset(OpARM64MOVDconst)
+                       v.AuxInt = int64ToAuxInt(int64(int32(c) * int32(d)))
                        return true
                }
                break
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx ptr idx mem) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, y1, y2, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MVN(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (MVN (XOR x y))
+       // result: (EON x y)
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 24 {
-                       break
-               }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 16 {
-                       break
-               }
-               _ = o0.Args[1]
-               x0 := o0.Args[0]
-               if x0.Op != OpARM64MOVHUloadidx {
-                       break
-               }
-               mem := x0.Args[2]
-               ptr := x0.Args[0]
-               idx := x0.Args[1]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
-                       break
-               }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst || auxIntToInt64(x1_1.AuxInt) != 2 || idx != x1_1.Args[0] || mem != x1.Args[2] {
-                       break
-               }
-               y2 := v_1
-               if y2.Op != OpARM64MOVDnop {
-                       break
-               }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
-                       break
-               }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst || auxIntToInt64(x2_1.AuxInt) != 3 || idx != x2_1.Args[0] || mem != x2.Args[2] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, y1, y2, o0)) {
+               if v_0.Op != OpARM64XOR {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-               v.copyOf(v0)
-               v0.AddArg3(ptr, idx, mem)
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpARM64EON)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx2 ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0, x1, x2, y1, y2, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
+       // match: (MVN (MOVDconst [c]))
+       // result: (MOVDconst [^c])
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 24 {
-                       break
-               }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 16 {
-                       break
-               }
-               _ = o0.Args[1]
-               x0 := o0.Args[0]
-               if x0.Op != OpARM64MOVHUloadidx2 {
-                       break
-               }
-               mem := x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload || auxIntToInt32(x1.AuxInt) != 2 {
-                       break
-               }
-               s := auxToSym(x1.Aux)
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADDshiftLL || auxIntToInt64(p1.AuxInt) != 1 {
-                       break
-               }
-               idx1 := p1.Args[1]
-               ptr1 := p1.Args[0]
-               if mem != x1.Args[1] {
-                       break
-               }
-               y2 := v_1
-               if y2.Op != OpARM64MOVDnop {
-                       break
-               }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload || auxIntToInt32(x2.AuxInt) != 3 || auxToSym(x2.Aux) != s {
-                       break
-               }
-               _ = x2.Args[1]
-               p := x2.Args[0]
-               if mem != x2.Args[1] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0, x1, x2, y1, y2, o0)) {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(x2.Pos, OpARM64MOVWUloadidx, t)
-               v.copyOf(v0)
-               v1 := b.NewValue0(x2.Pos, OpARM64SLLconst, idx0.Type)
-               v1.AuxInt = int64ToAuxInt(1)
-               v1.AddArg(idx0)
-               v0.AddArg3(ptr0, v1, mem)
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(^c)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
-       // cond: i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
+       // match: (MVN x:(SLLconst [c] y))
+       // cond: clobberIfDead(x)
+       // result: (MVNshiftLL [c] y)
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 56 {
-                       break
-               }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 48 {
-                       break
-               }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 40 {
-                       break
-               }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 32 {
-                       break
-               }
-               _ = o2.Args[1]
-               x0 := o2.Args[0]
-               if x0.Op != OpARM64MOVWUload {
-                       break
-               }
-               i0 := auxIntToInt32(x0.AuxInt)
-               s := auxToSym(x0.Aux)
-               mem := x0.Args[1]
-               p := x0.Args[0]
-               y1 := o2.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
-                       break
-               }
-               i4 := auxIntToInt32(x1.AuxInt)
-               if auxToSym(x1.Aux) != s {
+               x := v_0
+               if x.Op != OpARM64SLLconst {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] || mem != x1.Args[1] {
+               c := auxIntToInt64(x.AuxInt)
+               y := x.Args[0]
+               if !(clobberIfDead(x)) {
                        break
                }
-               y2 := o1.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               v.reset(OpARM64MVNshiftLL)
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MVN x:(SRLconst [c] y))
+       // cond: clobberIfDead(x)
+       // result: (MVNshiftRL [c] y)
+       for {
+               x := v_0
+               if x.Op != OpARM64SRLconst {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               c := auxIntToInt64(x.AuxInt)
+               y := x.Args[0]
+               if !(clobberIfDead(x)) {
                        break
                }
-               i5 := auxIntToInt32(x2.AuxInt)
-               if auxToSym(x2.Aux) != s {
+               v.reset(OpARM64MVNshiftRL)
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MVN x:(SRAconst [c] y))
+       // cond: clobberIfDead(x)
+       // result: (MVNshiftRA [c] y)
+       for {
+               x := v_0
+               if x.Op != OpARM64SRAconst {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] || mem != x2.Args[1] {
+               c := auxIntToInt64(x.AuxInt)
+               y := x.Args[0]
+               if !(clobberIfDead(x)) {
                        break
                }
-               y3 := o0.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               v.reset(OpARM64MVNshiftRA)
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MVN x:(RORconst [c] y))
+       // cond: clobberIfDead(x)
+       // result: (MVNshiftRO [c] y)
+       for {
+               x := v_0
+               if x.Op != OpARM64RORconst {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               c := auxIntToInt64(x.AuxInt)
+               y := x.Args[0]
+               if !(clobberIfDead(x)) {
                        break
                }
-               i6 := auxIntToInt32(x3.AuxInt)
-               if auxToSym(x3.Aux) != s {
+               v.reset(OpARM64MVNshiftRO)
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MVNshiftLL(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (MVNshiftLL (MOVDconst [c]) [d])
+       // result: (MOVDconst [^int64(uint64(c)<<uint64(d))])
+       for {
+               d := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] || mem != x3.Args[1] {
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(^int64(uint64(c) << uint64(d)))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MVNshiftRA(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (MVNshiftRA (MOVDconst [c]) [d])
+       // result: (MOVDconst [^(c>>uint64(d))])
+       for {
+               d := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               y4 := v_1
-               if y4.Op != OpARM64MOVDnop {
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(^(c >> uint64(d)))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MVNshiftRL(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (MVNshiftRL (MOVDconst [c]) [d])
+       // result: (MOVDconst [^int64(uint64(c)>>uint64(d))])
+       for {
+               d := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(^int64(uint64(c) >> uint64(d)))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MVNshiftRO(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (MVNshiftRO (MOVDconst [c]) [d])
+       // result: (MOVDconst [^rotateRight64(c, d)])
+       for {
+               d := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               i7 := auxIntToInt32(x4.AuxInt)
-               if auxToSym(x4.Aux) != s {
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(^rotateRight64(c, d))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NEG(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (NEG (MUL x y))
+       // result: (MNEG x y)
+       for {
+               if v_0.Op != OpARM64MUL {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] || mem != x4.Args[1] || !(i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)) {
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpARM64MNEG)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (NEG (MULW x y))
+       // result: (MNEGW x y)
+       for {
+               if v_0.Op != OpARM64MULW {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(x4.Pos, OpARM64MOVDload, t)
-               v.copyOf(v0)
-               v0.Aux = symToAux(s)
-               v1 := b.NewValue0(x4.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = int64ToAuxInt(int64(i0))
-               v1.AddArg(p)
-               v0.AddArg2(v1, mem)
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpARM64MNEGW)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
+       // match: (NEG (NEG x))
+       // result: x
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 56 {
+               if v_0.Op != OpARM64NEG {
                        break
                }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 48 {
+               x := v_0.Args[0]
+               v.copyOf(x)
+               return true
+       }
+       // match: (NEG (MOVDconst [c]))
+       // result: (MOVDconst [-c])
+       for {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 40 {
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(-c)
+               return true
+       }
+       // match: (NEG x:(SLLconst [c] y))
+       // cond: clobberIfDead(x)
+       // result: (NEGshiftLL [c] y)
+       for {
+               x := v_0
+               if x.Op != OpARM64SLLconst {
                        break
                }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 32 {
+               c := auxIntToInt64(x.AuxInt)
+               y := x.Args[0]
+               if !(clobberIfDead(x)) {
                        break
                }
-               _ = o2.Args[1]
-               x0 := o2.Args[0]
-               if x0.Op != OpARM64MOVWUloadidx {
+               v.reset(OpARM64NEGshiftLL)
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg(y)
+               return true
+       }
+       // match: (NEG x:(SRLconst [c] y))
+       // cond: clobberIfDead(x)
+       // result: (NEGshiftRL [c] y)
+       for {
+               x := v_0
+               if x.Op != OpARM64SRLconst {
                        break
                }
-               mem := x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               y1 := o2.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               c := auxIntToInt64(x.AuxInt)
+               y := x.Args[0]
+               if !(clobberIfDead(x)) {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload || auxIntToInt32(x1.AuxInt) != 4 {
+               v.reset(OpARM64NEGshiftRL)
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg(y)
+               return true
+       }
+       // match: (NEG x:(SRAconst [c] y))
+       // cond: clobberIfDead(x)
+       // result: (NEGshiftRA [c] y)
+       for {
+               x := v_0
+               if x.Op != OpARM64SRAconst {
                        break
                }
-               s := auxToSym(x1.Aux)
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               c := auxIntToInt64(x.AuxInt)
+               y := x.Args[0]
+               if !(clobberIfDead(x)) {
                        break
                }
-               _ = p1.Args[1]
-               p1_0 := p1.Args[0]
-               p1_1 := p1.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, p1_0, p1_1 = _i0+1, p1_1, p1_0 {
-                       ptr1 := p1_0
-                       idx1 := p1_1
-                       if mem != x1.Args[1] {
-                               continue
-                       }
-                       y2 := o1.Args[1]
-                       if y2.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUload || auxIntToInt32(x2.AuxInt) != 5 || auxToSym(x2.Aux) != s {
-                               continue
-                       }
-                       _ = x2.Args[1]
-                       p := x2.Args[0]
-                       if mem != x2.Args[1] {
-                               continue
-                       }
-                       y3 := o0.Args[1]
-                       if y3.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x3 := y3.Args[0]
-                       if x3.Op != OpARM64MOVBUload || auxIntToInt32(x3.AuxInt) != 6 || auxToSym(x3.Aux) != s {
-                               continue
-                       }
-                       _ = x3.Args[1]
-                       if p != x3.Args[0] || mem != x3.Args[1] {
-                               continue
-                       }
-                       y4 := v_1
-                       if y4.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x4 := y4.Args[0]
-                       if x4.Op != OpARM64MOVBUload || auxIntToInt32(x4.AuxInt) != 7 || auxToSym(x4.Aux) != s {
-                               continue
-                       }
-                       _ = x4.Args[1]
-                       if p != x4.Args[0] || mem != x4.Args[1] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1, x2, x3, x4)
-                       v0 := b.NewValue0(x4.Pos, OpARM64MOVDloadidx, t)
-                       v.copyOf(v0)
-                       v0.AddArg3(ptr0, idx0, mem)
-                       return true
-               }
-               break
+               v.reset(OpARM64NEGshiftRA)
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg(y)
+               return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx4 ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
+       return false
+}
+func rewriteValueARM64_OpARM64NEGshiftLL(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (NEGshiftLL (MOVDconst [c]) [d])
+       // result: (MOVDconst [-int64(uint64(c)<<uint64(d))])
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 56 {
+               d := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 48 {
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(-int64(uint64(c) << uint64(d)))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NEGshiftRA(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (NEGshiftRA (MOVDconst [c]) [d])
+       // result: (MOVDconst [-(c>>uint64(d))])
+       for {
+               d := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 40 {
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(-(c >> uint64(d)))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NEGshiftRL(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (NEGshiftRL (MOVDconst [c]) [d])
+       // result: (MOVDconst [-int64(uint64(c)>>uint64(d))])
+       for {
+               d := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 32 {
+               c := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(-int64(uint64(c) >> uint64(d)))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NotEqual(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (NotEqual (CMPconst [0] z:(AND x y)))
+       // cond: z.Uses == 1
+       // result: (NotEqual (TST x y))
+       for {
+               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
                        break
                }
-               _ = o2.Args[1]
-               x0 := o2.Args[0]
-               if x0.Op != OpARM64MOVWUloadidx4 {
+               z := v_0.Args[0]
+               if z.Op != OpARM64AND {
                        break
                }
-               mem := x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               y1 := o2.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               y := z.Args[1]
+               x := z.Args[0]
+               if !(z.Uses == 1) {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload || auxIntToInt32(x1.AuxInt) != 4 {
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64TST, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (NotEqual (CMPWconst [0] x:(ANDconst [c] y)))
+       // cond: x.Uses == 1
+       // result: (NotEqual (TSTWconst [int32(c)] y))
+       for {
+               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
                        break
                }
-               s := auxToSym(x1.Aux)
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADDshiftLL || auxIntToInt64(p1.AuxInt) != 2 {
+               x := v_0.Args[0]
+               if x.Op != OpARM64ANDconst {
                        break
                }
-               idx1 := p1.Args[1]
-               ptr1 := p1.Args[0]
-               if mem != x1.Args[1] {
+               c := auxIntToInt64(x.AuxInt)
+               y := x.Args[0]
+               if !(x.Uses == 1) {
                        break
                }
-               y2 := o1.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64TSTWconst, types.TypeFlags)
+               v0.AuxInt = int32ToAuxInt(int32(c))
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (NotEqual (CMPWconst [0] z:(AND x y)))
+       // cond: z.Uses == 1
+       // result: (NotEqual (TSTW x y))
+       for {
+               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload || auxIntToInt32(x2.AuxInt) != 5 || auxToSym(x2.Aux) != s {
+               z := v_0.Args[0]
+               if z.Op != OpARM64AND {
                        break
                }
-               _ = x2.Args[1]
-               p := x2.Args[0]
-               if mem != x2.Args[1] {
+               y := z.Args[1]
+               x := z.Args[0]
+               if !(z.Uses == 1) {
                        break
                }
-               y3 := o0.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64TSTW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (NotEqual (CMPconst [0] x:(ANDconst [c] y)))
+       // cond: x.Uses == 1
+       // result: (NotEqual (TSTconst [c] y))
+       for {
+               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload || auxIntToInt32(x3.AuxInt) != 6 || auxToSym(x3.Aux) != s {
+               x := v_0.Args[0]
+               if x.Op != OpARM64ANDconst {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] || mem != x3.Args[1] {
+               c := auxIntToInt64(x.AuxInt)
+               y := x.Args[0]
+               if !(x.Uses == 1) {
                        break
                }
-               y4 := v_1
-               if y4.Op != OpARM64MOVDnop {
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64TSTconst, types.TypeFlags)
+               v0.AuxInt = int64ToAuxInt(c)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (NotEqual (CMP x z:(NEG y)))
+       // cond: z.Uses == 1
+       // result: (NotEqual (CMN x y))
+       for {
+               if v_0.Op != OpARM64CMP {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload || auxIntToInt32(x4.AuxInt) != 7 || auxToSym(x4.Aux) != s {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               z := v_0.Args[1]
+               if z.Op != OpARM64NEG {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] || mem != x4.Args[1] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)) {
+               y := z.Args[0]
+               if !(z.Uses == 1) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(x4.Pos, OpARM64MOVDloadidx, t)
-               v.copyOf(v0)
-               v1 := b.NewValue0(x4.Pos, OpARM64SLLconst, idx0.Type)
-               v1.AuxInt = int64ToAuxInt(2)
-               v1.AddArg(idx0)
-               v0.AddArg3(ptr0, v1, mem)
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx ptr idx mem) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
+       // match: (NotEqual (CMPW x z:(NEG y)))
+       // cond: z.Uses == 1
+       // result: (NotEqual (CMNW x y))
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 56 {
+               if v_0.Op != OpARM64CMPW {
                        break
                }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 48 {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               z := v_0.Args[1]
+               if z.Op != OpARM64NEG {
                        break
                }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 40 {
+               y := z.Args[0]
+               if !(z.Uses == 1) {
                        break
                }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 32 {
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (NotEqual (CMPconst [0] x:(ADDconst [c] y)))
+       // cond: x.Uses == 1
+       // result: (NotEqual (CMNconst [c] y))
+       for {
+               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
                        break
                }
-               _ = o2.Args[1]
-               x0 := o2.Args[0]
-               if x0.Op != OpARM64MOVWUloadidx {
+               x := v_0.Args[0]
+               if x.Op != OpARM64ADDconst {
                        break
                }
-               mem := x0.Args[2]
-               ptr := x0.Args[0]
-               idx := x0.Args[1]
-               y1 := o2.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               c := auxIntToInt64(x.AuxInt)
+               y := x.Args[0]
+               if !(x.Uses == 1) {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64CMNconst, types.TypeFlags)
+               v0.AuxInt = int64ToAuxInt(c)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (NotEqual (CMPWconst [0] x:(ADDconst [c] y)))
+       // cond: x.Uses == 1
+       // result: (NotEqual (CMNWconst [int32(c)] y))
+       for {
+               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               x := v_0.Args[0]
+               if x.Op != OpARM64ADDconst {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst || auxIntToInt64(x1_1.AuxInt) != 4 || idx != x1_1.Args[0] || mem != x1.Args[2] {
+               c := auxIntToInt64(x.AuxInt)
+               y := x.Args[0]
+               if !(x.Uses == 1) {
                        break
                }
-               y2 := o1.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64CMNWconst, types.TypeFlags)
+               v0.AuxInt = int32ToAuxInt(int32(c))
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (NotEqual (CMPconst [0] z:(ADD x y)))
+       // cond: z.Uses == 1
+       // result: (NotEqual (CMN x y))
+       for {
+               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
+               z := v_0.Args[0]
+               if z.Op != OpARM64ADD {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               y := z.Args[1]
+               x := z.Args[0]
+               if !(z.Uses == 1) {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst || auxIntToInt64(x2_1.AuxInt) != 5 || idx != x2_1.Args[0] || mem != x2.Args[2] {
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (NotEqual (CMPWconst [0] z:(ADD x y)))
+       // cond: z.Uses == 1
+       // result: (NotEqual (CMNW x y))
+       for {
+               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
                        break
                }
-               y3 := o0.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               z := v_0.Args[0]
+               if z.Op != OpARM64ADD {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUloadidx {
+               y := z.Args[1]
+               x := z.Args[0]
+               if !(z.Uses == 1) {
                        break
                }
-               _ = x3.Args[2]
-               if ptr != x3.Args[0] {
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (NotEqual (CMPconst [0] z:(MADD a x y)))
+       // cond: z.Uses == 1
+       // result: (NotEqual (CMN a (MUL <x.Type> x y)))
+       for {
+               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
                        break
                }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64ADDconst || auxIntToInt64(x3_1.AuxInt) != 6 || idx != x3_1.Args[0] || mem != x3.Args[2] {
+               z := v_0.Args[0]
+               if z.Op != OpARM64MADD {
                        break
                }
-               y4 := v_1
-               if y4.Op != OpARM64MOVDnop {
+               y := z.Args[2]
+               a := z.Args[0]
+               x := z.Args[1]
+               if !(z.Uses == 1) {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUloadidx {
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+               v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+               v1.AddArg2(x, y)
+               v0.AddArg2(a, v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (NotEqual (CMPconst [0] z:(MSUB a x y)))
+       // cond: z.Uses == 1
+       // result: (NotEqual (CMP a (MUL <x.Type> x y)))
+       for {
+               if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 {
                        break
                }
-               _ = x4.Args[2]
-               if ptr != x4.Args[0] {
+               z := v_0.Args[0]
+               if z.Op != OpARM64MSUB {
                        break
                }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpARM64ADDconst || auxIntToInt64(x4_1.AuxInt) != 7 || idx != x4_1.Args[0] || mem != x4.Args[2] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)) {
+               y := z.Args[2]
+               a := z.Args[0]
+               x := z.Args[1]
+               if !(z.Uses == 1) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-               v.copyOf(v0)
-               v0.AddArg3(ptr, idx, mem)
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+               v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+               v1.AddArg2(x, y)
+               v0.AddArg2(a, v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, y0, y1)
-       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
+       // match: (NotEqual (CMPWconst [0] z:(MADDW a x y)))
+       // cond: z.Uses == 1
+       // result: (NotEqual (CMNW a (MULW <x.Type> x y)))
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 8 {
-                       break
-               }
-               y0 := v_0
-               if y0.Op != OpARM64MOVDnop {
+               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               z := v_0.Args[0]
+               if z.Op != OpARM64MADDW {
                        break
                }
-               i1 := auxIntToInt32(x0.AuxInt)
-               s := auxToSym(x0.Aux)
-               mem := x0.Args[1]
-               p := x0.Args[0]
-               y1 := v_1
-               if y1.Op != OpARM64MOVDnop {
+               y := z.Args[2]
+               a := z.Args[0]
+               x := z.Args[1]
+               if !(z.Uses == 1) {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+               v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+               v1.AddArg2(x, y)
+               v0.AddArg2(a, v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (NotEqual (CMPWconst [0] z:(MSUBW a x y)))
+       // cond: z.Uses == 1
+       // result: (NotEqual (CMPW a (MULW <x.Type> x y)))
+       for {
+               if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {
                        break
                }
-               i0 := auxIntToInt32(x1.AuxInt)
-               if auxToSym(x1.Aux) != s {
+               z := v_0.Args[0]
+               if z.Op != OpARM64MSUBW {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, y0, y1)) {
+               y := z.Args[2]
+               a := z.Args[0]
+               x := z.Args[1]
+               if !(z.Uses == 1) {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(x1.Pos, OpARM64REV16W, t)
-               v.copyOf(v0)
-               v1 := b.NewValue0(x1.Pos, OpARM64MOVHUload, t)
-               v1.AuxInt = int32ToAuxInt(i0)
-               v1.Aux = symToAux(s)
-               v1.AddArg2(p, mem)
-               v0.AddArg(v1)
+               v.reset(OpARM64NotEqual)
+               v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+               v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+               v1.AddArg2(x, y)
+               v0.AddArg2(a, v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0, x1, y0, y1)
-       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
+       // match: (NotEqual (FlagConstant [fc]))
+       // result: (MOVDconst [b2i(fc.ne())])
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 8 {
+               if v_0.Op != OpARM64FlagConstant {
                        break
                }
-               y0 := v_0
-               if y0.Op != OpARM64MOVDnop {
+               fc := auxIntToFlagConstant(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(b2i(fc.ne()))
+               return true
+       }
+       // match: (NotEqual (InvertFlags x))
+       // result: (NotEqual x)
+       for {
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload || auxIntToInt32(x0.AuxInt) != 1 {
-                       break
+               x := v_0.Args[0]
+               v.reset(OpARM64NotEqual)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64OR(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (OR x (MOVDconst [c]))
+       // result: (ORconst [c] x)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       v.reset(OpARM64ORconst)
+                       v.AuxInt = int64ToAuxInt(c)
+                       v.AddArg(x)
+                       return true
                }
-               s := auxToSym(x0.Aux)
-               mem := x0.Args[1]
-               p1 := x0.Args[0]
-               if p1.Op != OpARM64ADD {
+               break
+       }
+       // match: (OR x x)
+       // result: x
+       for {
+               x := v_0
+               if x != v_1 {
                        break
                }
-               _ = p1.Args[1]
-               p1_0 := p1.Args[0]
-               p1_1 := p1.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, p1_0, p1_1 = _i0+1, p1_1, p1_0 {
-                       ptr1 := p1_0
-                       idx1 := p1_1
-                       y1 := v_1
-                       if y1.Op != OpARM64MOVDnop {
+               v.copyOf(x)
+               return true
+       }
+       // match: (OR x (MVN y))
+       // result: (ORN x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MVN {
                                continue
                        }
-                       x1 := y1.Args[0]
-                       if x1.Op != OpARM64MOVBUloadidx {
+                       y := v_1.Args[0]
+                       v.reset(OpARM64ORN)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
+       // match: (OR x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftLL x0 y [c])
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x0 := v_0
+                       x1 := v_1
+                       if x1.Op != OpARM64SLLconst {
                                continue
                        }
-                       _ = x1.Args[2]
-                       ptr0 := x1.Args[0]
-                       idx0 := x1.Args[1]
-                       if mem != x1.Args[2] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0, x1, y0, y1)) {
+                       c := auxIntToInt64(x1.AuxInt)
+                       y := x1.Args[0]
+                       if !(clobberIfDead(x1)) {
                                continue
                        }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpARM64REV16W, t)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x0.Pos, OpARM64MOVHUloadidx, t)
-                       v1.AddArg3(ptr0, idx0, mem)
-                       v0.AddArg(v1)
+                       v.reset(OpARM64ORshiftLL)
+                       v.AuxInt = int64ToAuxInt(c)
+                       v.AddArg2(x0, y)
                        return true
                }
                break
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, y0, y1)
-       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
+       // match: (OR x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRL x0 y [c])
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 8 {
-                       break
-               }
-               y0 := v_0
-               if y0.Op != OpARM64MOVDnop {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               mem := x0.Args[2]
-               ptr := x0.Args[0]
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64ADDconst || auxIntToInt64(x0_1.AuxInt) != 1 {
-                       break
-               }
-               idx := x0_1.Args[0]
-               y1 := v_1
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] || idx != x1.Args[1] || mem != x1.Args[2] || !(x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, y0, y1)) {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x0 := v_0
+                       x1 := v_1
+                       if x1.Op != OpARM64SRLconst {
+                               continue
+                       }
+                       c := auxIntToInt64(x1.AuxInt)
+                       y := x1.Args[0]
+                       if !(clobberIfDead(x1)) {
+                               continue
+                       }
+                       v.reset(OpARM64ORshiftRL)
+                       v.AuxInt = int64ToAuxInt(c)
+                       v.AddArg2(x0, y)
+                       return true
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, t)
-               v.copyOf(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
-               v1.AddArg3(ptr, idx, mem)
-               v0.AddArg(v1)
-               return true
+               break
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [i2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, y0, y1, y2, o0)
-       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
+       // match: (OR x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRA x0 y [c])
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 24 {
-                       break
-               }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 16 {
-                       break
-               }
-               _ = o0.Args[1]
-               y0 := o0.Args[0]
-               if y0.Op != OpARM64REV16W {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVHUload {
-                       break
-               }
-               i2 := auxIntToInt32(x0.AuxInt)
-               s := auxToSym(x0.Aux)
-               mem := x0.Args[1]
-               p := x0.Args[0]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
-                       break
-               }
-               i1 := auxIntToInt32(x1.AuxInt)
-               if auxToSym(x1.Aux) != s {
-                       break
-               }
-               _ = x1.Args[1]
-               if p != x1.Args[0] || mem != x1.Args[1] {
-                       break
-               }
-               y2 := v_1
-               if y2.Op != OpARM64MOVDnop {
-                       break
-               }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
-                       break
-               }
-               i0 := auxIntToInt32(x2.AuxInt)
-               if auxToSym(x2.Aux) != s {
-                       break
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x0 := v_0
+                       x1 := v_1
+                       if x1.Op != OpARM64SRAconst {
+                               continue
+                       }
+                       c := auxIntToInt64(x1.AuxInt)
+                       y := x1.Args[0]
+                       if !(clobberIfDead(x1)) {
+                               continue
+                       }
+                       v.reset(OpARM64ORshiftRA)
+                       v.AuxInt = int64ToAuxInt(c)
+                       v.AddArg2(x0, y)
+                       return true
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] || mem != x2.Args[1] || !(i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, y0, y1, y2, o0)) {
-                       break
+               break
+       }
+       // match: (OR x0 x1:(RORconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRO x0 y [c])
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x0 := v_0
+                       x1 := v_1
+                       if x1.Op != OpARM64RORconst {
+                               continue
+                       }
+                       c := auxIntToInt64(x1.AuxInt)
+                       y := x1.Args[0]
+                       if !(clobberIfDead(x1)) {
+                               continue
+                       }
+                       v.reset(OpARM64ORshiftRO)
+                       v.AuxInt = int64ToAuxInt(c)
+                       v.AddArg2(x0, y)
+                       return true
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(x2.Pos, OpARM64REVW, t)
-               v.copyOf(v0)
-               v1 := b.NewValue0(x2.Pos, OpARM64MOVWUload, t)
-               v1.Aux = symToAux(s)
-               v2 := b.NewValue0(x2.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = int64ToAuxInt(int64(i0))
-               v2.AddArg(p)
-               v1.AddArg2(v2, mem)
-               v0.AddArg(v1)
-               return true
+               break
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, y0, y1, y2, o0)
-       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
+       // match: (OR (UBFIZ [bfc] x) (ANDconst [ac] y))
+       // cond: ac == ^((1<<uint(bfc.getARM64BFwidth())-1) << uint(bfc.getARM64BFlsb()))
+       // result: (BFI [bfc] y x)
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 24 {
-                       break
-               }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 16 {
-                       break
-               }
-               _ = o0.Args[1]
-               y0 := o0.Args[0]
-               if y0.Op != OpARM64REV16W {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVHUload || auxIntToInt32(x0.AuxInt) != 2 {
-                       break
-               }
-               s := auxToSym(x0.Aux)
-               mem := x0.Args[1]
-               p := x0.Args[0]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload || auxIntToInt32(x1.AuxInt) != 1 || auxToSym(x1.Aux) != s {
-                       break
-               }
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
-                       break
-               }
-               _ = p1.Args[1]
-               p1_0 := p1.Args[0]
-               p1_1 := p1.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, p1_0, p1_1 = _i0+1, p1_1, p1_0 {
-                       ptr1 := p1_0
-                       idx1 := p1_1
-                       if mem != x1.Args[1] {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpARM64UBFIZ {
+                               continue
+                       }
+                       bfc := auxIntToArm64BitField(v_0.AuxInt)
+                       x := v_0.Args[0]
+                       if v_1.Op != OpARM64ANDconst {
                                continue
                        }
-                       y2 := v_1
-                       if y2.Op != OpARM64MOVDnop {
+                       ac := auxIntToInt64(v_1.AuxInt)
+                       y := v_1.Args[0]
+                       if !(ac == ^((1<<uint(bfc.getARM64BFwidth()) - 1) << uint(bfc.getARM64BFlsb()))) {
+                               continue
+                       }
+                       v.reset(OpARM64BFI)
+                       v.AuxInt = arm64BitFieldToAuxInt(bfc)
+                       v.AddArg2(y, x)
+                       return true
+               }
+               break
+       }
+       // match: (OR (UBFX [bfc] x) (ANDconst [ac] y))
+       // cond: ac == ^(1<<uint(bfc.getARM64BFwidth())-1)
+       // result: (BFXIL [bfc] y x)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpARM64UBFX {
                                continue
                        }
-                       x2 := y2.Args[0]
-                       if x2.Op != OpARM64MOVBUloadidx {
+                       bfc := auxIntToArm64BitField(v_0.AuxInt)
+                       x := v_0.Args[0]
+                       if v_1.Op != OpARM64ANDconst {
                                continue
                        }
-                       _ = x2.Args[2]
-                       ptr0 := x2.Args[0]
-                       idx0 := x2.Args[1]
-                       if mem != x2.Args[2] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, y0, y1, y2, o0)) {
+                       ac := auxIntToInt64(v_1.AuxInt)
+                       y := v_1.Args[0]
+                       if !(ac == ^(1<<uint(bfc.getARM64BFwidth()) - 1)) {
                                continue
                        }
-                       b = mergePoint(b, x0, x1, x2)
-                       v0 := b.NewValue0(x1.Pos, OpARM64REVW, t)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x1.Pos, OpARM64MOVWUloadidx, t)
-                       v1.AddArg3(ptr0, idx0, mem)
-                       v0.AddArg(v1)
+                       v.reset(OpARM64BFXIL)
+                       v.AuxInt = arm64BitFieldToAuxInt(bfc)
+                       v.AddArg2(y, x)
                        return true
                }
                break
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, y0, y1, y2, o0)
-       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
+       return false
+}
+func rewriteValueARM64_OpARM64ORN(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ORN x (MOVDconst [c]))
+       // result: (ORconst [^c] x)
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 24 {
-                       break
-               }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 16 {
-                       break
-               }
-               _ = o0.Args[1]
-               y0 := o0.Args[0]
-               if y0.Op != OpARM64REV16W {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVHUloadidx {
-                       break
-               }
-               mem := x0.Args[2]
-               ptr := x0.Args[0]
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64ADDconst || auxIntToInt64(x0_1.AuxInt) != 2 {
-                       break
-               }
-               idx := x0_1.Args[0]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
-                       break
-               }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst || auxIntToInt64(x1_1.AuxInt) != 1 || idx != x1_1.Args[0] || mem != x1.Args[2] {
-                       break
-               }
-               y2 := v_1
-               if y2.Op != OpARM64MOVDnop {
-                       break
-               }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] || idx != x2.Args[1] || mem != x2.Args[2] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, y0, y1, y2, o0)) {
+               x := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
-               v.copyOf(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-               v1.AddArg3(ptr, idx, mem)
-               v0.AddArg(v1)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64ToAuxInt(^c)
+               v.AddArg(x)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [i4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
+       // match: (ORN x x)
+       // result: (MOVDconst [-1])
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 56 {
-                       break
-               }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 48 {
-                       break
-               }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 40 {
-                       break
-               }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 32 {
-                       break
-               }
-               _ = o2.Args[1]
-               y0 := o2.Args[0]
-               if y0.Op != OpARM64REVW {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVWUload {
-                       break
-               }
-               i4 := auxIntToInt32(x0.AuxInt)
-               s := auxToSym(x0.Aux)
-               mem := x0.Args[1]
-               p := x0.Args[0]
-               y1 := o2.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               x := v_0
+               if x != v_1 {
                        break
                }
-               i3 := auxIntToInt32(x1.AuxInt)
-               if auxToSym(x1.Aux) != s {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(-1)
+               return true
+       }
+       // match: (ORN x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORNshiftLL x0 y [c])
+       for {
+               x0 := v_0
+               x1 := v_1
+               if x1.Op != OpARM64SLLconst {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] || mem != x1.Args[1] {
+               c := auxIntToInt64(x1.AuxInt)
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               y2 := o1.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               v.reset(OpARM64ORNshiftLL)
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x0, y)
+               return true
+       }
+       // match: (ORN x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORNshiftRL x0 y [c])
+       for {
+               x0 := v_0
+               x1 := v_1
+               if x1.Op != OpARM64SRLconst {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               c := auxIntToInt64(x1.AuxInt)
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               i2 := auxIntToInt32(x2.AuxInt)
-               if auxToSym(x2.Aux) != s {
+               v.reset(OpARM64ORNshiftRL)
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x0, y)
+               return true
+       }
+       // match: (ORN x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORNshiftRA x0 y [c])
+       for {
+               x0 := v_0
+               x1 := v_1
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] || mem != x2.Args[1] {
+               c := auxIntToInt64(x1.AuxInt)
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               y3 := o0.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               v.reset(OpARM64ORNshiftRA)
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x0, y)
+               return true
+       }
+       // match: (ORN x0 x1:(RORconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORNshiftRO x0 y [c])
+       for {
+               x0 := v_0
+               x1 := v_1
+               if x1.Op != OpARM64RORconst {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               c := auxIntToInt64(x1.AuxInt)
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               i1 := auxIntToInt32(x3.AuxInt)
-               if auxToSym(x3.Aux) != s {
+               v.reset(OpARM64ORNshiftRO)
+               v.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x0, y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORNshiftLL(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ORNshiftLL x (MOVDconst [c]) [d])
+       // result: (ORconst x [^int64(uint64(c)<<uint64(d))])
+       for {
+               d := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] || mem != x3.Args[1] {
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64ToAuxInt(^int64(uint64(c) << uint64(d)))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORNshiftLL (SLLconst x [c]) x [c])
+       // result: (MOVDconst [-1])
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != c {
                        break
                }
-               y4 := v_1
-               if y4.Op != OpARM64MOVDnop {
+               x := v_0.Args[0]
+               if x != v_1 {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(-1)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORNshiftRA(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ORNshiftRA x (MOVDconst [c]) [d])
+       // result: (ORconst x [^(c>>uint64(d))])
+       for {
+               d := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               i0 := auxIntToInt32(x4.AuxInt)
-               if auxToSym(x4.Aux) != s {
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64ToAuxInt(^(c >> uint64(d)))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORNshiftRA (SRAconst x [c]) x [c])
+       // result: (MOVDconst [-1])
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64SRAconst || auxIntToInt64(v_0.AuxInt) != c {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] || mem != x4.Args[1] || !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)) {
+               x := v_0.Args[0]
+               if x != v_1 {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(x4.Pos, OpARM64REV, t)
-               v.copyOf(v0)
-               v1 := b.NewValue0(x4.Pos, OpARM64MOVDload, t)
-               v1.Aux = symToAux(s)
-               v2 := b.NewValue0(x4.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = int64ToAuxInt(int64(i0))
-               v2.AddArg(p)
-               v1.AddArg2(v2, mem)
-               v0.AddArg(v1)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(-1)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
+       return false
+}
+func rewriteValueARM64_OpARM64ORNshiftRL(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ORNshiftRL x (MOVDconst [c]) [d])
+       // result: (ORconst x [^int64(uint64(c)>>uint64(d))])
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 56 {
-                       break
-               }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 48 {
+               d := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 40 {
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64ToAuxInt(^int64(uint64(c) >> uint64(d)))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORNshiftRL (SRLconst x [c]) x [c])
+       // result: (MOVDconst [-1])
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != c {
                        break
                }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 32 {
+               x := v_0.Args[0]
+               if x != v_1 {
                        break
                }
-               _ = o2.Args[1]
-               y0 := o2.Args[0]
-               if y0.Op != OpARM64REVW {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(-1)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORNshiftRO(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ORNshiftRO x (MOVDconst [c]) [d])
+       // result: (ORconst x [^rotateRight64(c, d)])
+       for {
+               d := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVWUload || auxIntToInt32(x0.AuxInt) != 4 {
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64ToAuxInt(^rotateRight64(c, d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORNshiftRO (RORconst x [c]) x [c])
+       // result: (MOVDconst [-1])
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64RORconst || auxIntToInt64(v_0.AuxInt) != c {
                        break
                }
-               s := auxToSym(x0.Aux)
-               mem := x0.Args[1]
-               p := x0.Args[0]
-               y1 := o2.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               x := v_0.Args[0]
+               if x != v_1 {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload || auxIntToInt32(x1.AuxInt) != 3 || auxToSym(x1.Aux) != s {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(-1)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORconst(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (ORconst [0] x)
+       // result: x
+       for {
+               if auxIntToInt64(v.AuxInt) != 0 {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] || mem != x1.Args[1] {
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
+       // match: (ORconst [-1] _)
+       // result: (MOVDconst [-1])
+       for {
+               if auxIntToInt64(v.AuxInt) != -1 {
                        break
                }
-               y2 := o1.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(-1)
+               return true
+       }
+       // match: (ORconst [c] (MOVDconst [d]))
+       // result: (MOVDconst [c|d])
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload || auxIntToInt32(x2.AuxInt) != 2 || auxToSym(x2.Aux) != s {
+               d := auxIntToInt64(v_0.AuxInt)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64ToAuxInt(c | d)
+               return true
+       }
+       // match: (ORconst [c] (ORconst [d] x))
+       // result: (ORconst [c|d] x)
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64ORconst {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] || mem != x2.Args[1] {
+               d := auxIntToInt64(v_0.AuxInt)
+               x := v_0.Args[0]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64ToAuxInt(c | d)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORconst [c1] (ANDconst [c2] x))
+       // cond: c2|c1 == ^0
+       // result: (ORconst [c1] x)
+       for {
+               c1 := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               y3 := o0.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               c2 := auxIntToInt64(v_0.AuxInt)
+               x := v_0.Args[0]
+               if !(c2|c1 == ^0) {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload || auxIntToInt32(x3.AuxInt) != 1 || auxToSym(x3.Aux) != s {
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64ToAuxInt(c1)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (ORshiftLL (MOVDconst [c]) x [d])
+       // result: (ORconst [c] (SLLconst <x.Type> x [d]))
+       for {
+               d := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x3.Args[1]
-               p1 := x3.Args[0]
-               if p1.Op != OpARM64ADD {
+               c := auxIntToInt64(v_0.AuxInt)
+               x := v_1
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64ToAuxInt(c)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = int64ToAuxInt(d)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (ORshiftLL x (MOVDconst [c]) [d])
+       // result: (ORconst x [int64(uint64(c)<<uint64(d))])
+       for {
+               d := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = p1.Args[1]
-               p1_0 := p1.Args[0]
-               p1_1 := p1.Args[1]
-               for _i0 := 0; _i0 <= 1; _i0, p1_0, p1_1 = _i0+1, p1_1, p1_0 {
-                       ptr1 := p1_0
-                       idx1 := p1_1
-                       if mem != x3.Args[1] {
-                               continue
-                       }
-                       y4 := v_1
-                       if y4.Op != OpARM64MOVDnop {
-                               continue
-                       }
-                       x4 := y4.Args[0]
-                       if x4.Op != OpARM64MOVBUloadidx {
-                               continue
-                       }
-                       _ = x4.Args[2]
-                       ptr0 := x4.Args[0]
-                       idx0 := x4.Args[1]
-                       if mem != x4.Args[2] || !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1, x2, x3, x4)
-                       v0 := b.NewValue0(x3.Pos, OpARM64REV, t)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x3.Pos, OpARM64MOVDloadidx, t)
-                       v1.AddArg3(ptr0, idx0, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64ToAuxInt(int64(uint64(c) << uint64(d)))
+               v.AddArg(x)
+               return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
+       // match: (ORshiftLL y:(SLLconst x [c]) x [c])
+       // result: y
        for {
-               t := v.Type
-               if auxIntToInt64(v.AuxInt) != 56 {
+               c := auxIntToInt64(v.AuxInt)
+               y := v_0
+               if y.Op != OpARM64SLLconst || auxIntToInt64(y.AuxInt) != c {
                        break
                }
-               o0 := v_0
-               if o0.Op != OpARM64ORshiftLL || auxIntToInt64(o0.AuxInt) != 48 {
+               x := y.Args[0]
+               if x != v_1 {
                        break
                }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL || auxIntToInt64(o1.AuxInt) != 40 {
+               v.copyOf(y)
+               return true
+       }
+       // match: (ORshiftLL <typ.UInt16> [8] (UBFX <typ.UInt16> [armBFAuxInt(8, 8)] x) x)
+       // result: (REV16W x)
+       for {
+               if v.Type != typ.UInt16 || auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64UBFX || v_0.Type != typ.UInt16 || auxIntToArm64BitField(v_0.AuxInt) != armBFAuxInt(8, 8) {
                        break
                }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL || auxIntToInt64(o2.AuxInt) != 32 {
+               x := v_0.Args[0]
+               if x != v_1 {
                        break
                }
-               _ = o2.Args[1]
-               y0 := o2.Args[0]
-               if y0.Op != OpARM64REVW {
+               v.reset(OpARM64REV16W)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL [8] (UBFX [armBFAuxInt(8, 24)] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff
+       // result: (REV16W x)
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64UBFX || auxIntToArm64BitField(v_0.AuxInt) != armBFAuxInt(8, 24) {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVWUloadidx {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
                        break
                }
-               mem := x0.Args[2]
-               ptr := x0.Args[0]
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64ADDconst || auxIntToInt64(x0_1.AuxInt) != 4 {
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
                        break
                }
-               idx := x0_1.Args[0]
-               y1 := o2.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff) {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
+               v.reset(OpARM64REV16W)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: (uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff)
+       // result: (REV16 x)
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 8 {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst || auxIntToInt64(x1_1.AuxInt) != 3 || idx != x1_1.Args[0] || mem != x1.Args[2] {
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
                        break
                }
-               y2 := o1.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff) {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
+               v.reset(OpARM64REV16)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: (uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff)
+       // result: (REV16 (ANDconst <x.Type> [0xffffffff] x))
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 8 {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst || auxIntToInt64(x2_1.AuxInt) != 2 || idx != x2_1.Args[0] || mem != x2.Args[2] {
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
                        break
                }
-               y3 := o0.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff) {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUloadidx {
+               v.reset(OpARM64REV16)
+               v0 := b.NewValue0(v.Pos, OpARM64ANDconst, x.Type)
+               v0.AuxInt = int64ToAuxInt(0xffffffff)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: ( ORshiftLL [c] (SRLconst x [64-c]) x2)
+       // result: (EXTRconst [64-c] x2 x)
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 64-c {
                        break
                }
-               _ = x3.Args[2]
-               if ptr != x3.Args[0] {
+               x := v_0.Args[0]
+               x2 := v_1
+               v.reset(OpARM64EXTRconst)
+               v.AuxInt = int64ToAuxInt(64 - c)
+               v.AddArg2(x2, x)
+               return true
+       }
+       // match: ( ORshiftLL <t> [c] (UBFX [bfc] x) x2)
+       // cond: c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
+       // result: (EXTRWconst [32-c] x2 x)
+       for {
+               t := v.Type
+               c := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64ADDconst || auxIntToInt64(x3_1.AuxInt) != 1 || idx != x3_1.Args[0] || mem != x3.Args[2] {
+               bfc := auxIntToArm64BitField(v_0.AuxInt)
+               x := v_0.Args[0]
+               x2 := v_1
+               if !(c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)) {
                        break
                }
-               y4 := v_1
-               if y4.Op != OpARM64MOVDnop {
+               v.reset(OpARM64EXTRWconst)
+               v.AuxInt = int64ToAuxInt(32 - c)
+               v.AddArg2(x2, x)
+               return true
+       }
+       // match: (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y))
+       // cond: sc == bfc.getARM64BFwidth()
+       // result: (BFXIL [bfc] y x)
+       for {
+               sc := auxIntToInt64(v.AuxInt)
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUloadidx {
+               bfc := auxIntToArm64BitField(v_0.AuxInt)
+               x := v_0.Args[0]
+               if v_1.Op != OpARM64SRLconst || auxIntToInt64(v_1.AuxInt) != sc {
                        break
                }
-               _ = x4.Args[2]
-               if ptr != x4.Args[0] || idx != x4.Args[1] || mem != x4.Args[2] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)) {
+               y := v_1.Args[0]
+               if !(sc == bfc.getARM64BFwidth()) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
-               v.copyOf(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-               v1.AddArg3(ptr, idx, mem)
-               v0.AddArg(v1)
+               v.reset(OpARM64BFXIL)
+               v.AuxInt = arm64BitFieldToAuxInt(bfc)
+               v.AddArg2(y, x)
                return true
        }
        return false
index c9930516bc4e805c501100864c4e8d4e4451a7e4..b5e3901f9fdefebd8298678ee069a867a732e34b 100644 (file)
@@ -445,6 +445,12 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpPPC64ANDCCconst(v)
        case OpPPC64ANDN:
                return rewriteValuePPC64_OpPPC64ANDN(v)
+       case OpPPC64BRD:
+               return rewriteValuePPC64_OpPPC64BRD(v)
+       case OpPPC64BRH:
+               return rewriteValuePPC64_OpPPC64BRH(v)
+       case OpPPC64BRW:
+               return rewriteValuePPC64_OpPPC64BRW(v)
        case OpPPC64CLRLSLDI:
                return rewriteValuePPC64_OpPPC64CLRLSLDI(v)
        case OpPPC64CMP:
@@ -1134,6 +1140,8 @@ func rewriteValuePPC64_OpBitLen64(v *Value) bool {
 }
 func rewriteValuePPC64_OpBswap16(v *Value) bool {
        v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
        // match: (Bswap16 x)
        // cond: buildcfg.GOPPC64>=10
        // result: (BRH x)
@@ -1146,10 +1154,51 @@ func rewriteValuePPC64_OpBswap16(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Bswap16 x:(MOVHZload [off] {sym} ptr mem))
+       // result: @x.Block (MOVHBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVHZload {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpPPC64MOVHBRload, typ.UInt16)
+               v.copyOf(v0)
+               v1 := b.NewValue0(x.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v1.AuxInt = int32ToAuxInt(off)
+               v1.Aux = symToAux(sym)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
+       // match: (Bswap16 x:(MOVHZloadidx ptr idx mem))
+       // result: @x.Block (MOVHZreg (MOVHBRloadidx ptr idx mem))
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVHZloadidx {
+                       break
+               }
+               mem := x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpPPC64MOVHBRloadidx, typ.Int16)
+               v1.AddArg3(ptr, idx, mem)
+               v0.AddArg(v1)
+               return true
+       }
        return false
 }
 func rewriteValuePPC64_OpBswap32(v *Value) bool {
        v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
        // match: (Bswap32 x)
        // cond: buildcfg.GOPPC64>=10
        // result: (BRW x)
@@ -1162,10 +1211,51 @@ func rewriteValuePPC64_OpBswap32(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Bswap32 x:(MOVWZload [off] {sym} ptr mem))
+       // result: @x.Block (MOVWBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVWZload {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpPPC64MOVWBRload, typ.UInt32)
+               v.copyOf(v0)
+               v1 := b.NewValue0(x.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v1.AuxInt = int32ToAuxInt(off)
+               v1.Aux = symToAux(sym)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
+       // match: (Bswap32 x:(MOVWZloadidx ptr idx mem))
+       // result: @x.Block (MOVWZreg (MOVWBRloadidx ptr idx mem))
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVWZloadidx {
+                       break
+               }
+               mem := x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVWZreg, typ.Int64)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpPPC64MOVWBRloadidx, typ.Int32)
+               v1.AddArg3(ptr, idx, mem)
+               v0.AddArg(v1)
+               return true
+       }
        return false
 }
 func rewriteValuePPC64_OpBswap64(v *Value) bool {
        v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
        // match: (Bswap64 x)
        // cond: buildcfg.GOPPC64>=10
        // result: (BRD x)
@@ -1178,6 +1268,43 @@ func rewriteValuePPC64_OpBswap64(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Bswap64 x:(MOVDload [off] {sym} ptr mem))
+       // result: @x.Block (MOVDBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVDload {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpPPC64MOVDBRload, typ.UInt64)
+               v.copyOf(v0)
+               v1 := b.NewValue0(x.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v1.AuxInt = int32ToAuxInt(off)
+               v1.Aux = symToAux(sym)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
+       // match: (Bswap64 x:(MOVDloadidx ptr idx mem))
+       // result: @x.Block (MOVDBRloadidx ptr idx mem)
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVDloadidx {
+                       break
+               }
+               mem := x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVDBRloadidx, typ.Int64)
+               v.copyOf(v0)
+               v0.AddArg3(ptr, idx, mem)
+               return true
+       }
        return false
 }
 func rewriteValuePPC64_OpCom16(v *Value) bool {
@@ -4222,6 +4349,163 @@ func rewriteValuePPC64_OpPPC64ANDN(v *Value) bool {
        }
        return false
 }
+func rewriteValuePPC64_OpPPC64BRD(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (BRD x:(MOVDload [off] {sym} ptr mem))
+       // cond: x.Uses == 1
+       // result: @x.Block (MOVDBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVDload {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpPPC64MOVDBRload, typ.UInt64)
+               v.copyOf(v0)
+               v1 := b.NewValue0(x.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v1.AuxInt = int32ToAuxInt(off)
+               v1.Aux = symToAux(sym)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
+       // match: (BRD x:(MOVDloadidx ptr idx mem))
+       // cond: x.Uses == 1
+       // result: @x.Block (MOVDBRloadidx ptr idx mem)
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVDloadidx {
+                       break
+               }
+               mem := x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               if !(x.Uses == 1) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVDBRloadidx, typ.Int64)
+               v.copyOf(v0)
+               v0.AddArg3(ptr, idx, mem)
+               return true
+       }
+       return false
+}
+func rewriteValuePPC64_OpPPC64BRH(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (BRH x:(MOVHZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1
+       // result: @x.Block (MOVHBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVHZload {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpPPC64MOVHBRload, typ.UInt16)
+               v.copyOf(v0)
+               v1 := b.NewValue0(x.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v1.AuxInt = int32ToAuxInt(off)
+               v1.Aux = symToAux(sym)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
+       // match: (BRH x:(MOVHZloadidx ptr idx mem))
+       // cond: x.Uses == 1
+       // result: @x.Block (MOVHZreg (MOVHBRloadidx ptr idx mem))
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVHZloadidx {
+                       break
+               }
+               mem := x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               if !(x.Uses == 1) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpPPC64MOVHBRloadidx, typ.Int16)
+               v1.AddArg3(ptr, idx, mem)
+               v0.AddArg(v1)
+               return true
+       }
+       return false
+}
+func rewriteValuePPC64_OpPPC64BRW(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (BRW x:(MOVWZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1
+       // result: @x.Block (MOVWBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVWZload {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpPPC64MOVWBRload, typ.UInt32)
+               v.copyOf(v0)
+               v1 := b.NewValue0(x.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v1.AuxInt = int32ToAuxInt(off)
+               v1.Aux = symToAux(sym)
+               v1.AddArg(ptr)
+               v0.AddArg2(v1, mem)
+               return true
+       }
+       // match: (BRW x:(MOVWZloadidx ptr idx mem))
+       // cond: x.Uses == 1
+       // result: @x.Block (MOVWZreg (MOVWBRloadidx ptr idx mem))
+       for {
+               x := v_0
+               if x.Op != OpPPC64MOVWZloadidx {
+                       break
+               }
+               mem := x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               if !(x.Uses == 1) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVWZreg, typ.Int64)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpPPC64MOVWBRloadidx, typ.Int32)
+               v1.AddArg3(ptr, idx, mem)
+               v0.AddArg(v1)
+               return true
+       }
+       return false
+}
 func rewriteValuePPC64_OpPPC64CLRLSLDI(v *Value) bool {
        v_0 := v.Args[0]
        // match: (CLRLSLDI [c] (SRWconst [s] x))
@@ -6891,7 +7175,6 @@ func rewriteValuePPC64_OpPPC64MOVBstore(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       config := b.Func.Config
        typ := &b.Func.Config.Types
        // match: (MOVBstore [off1] {sym} (ADDconst [off2] x) val mem)
        // cond: is16Bit(int64(off1)+off2)
@@ -7196,504 +7479,73 @@ func rewriteValuePPC64_OpPPC64MOVBstore(v *Value) bool {
                v.AddArg3(ptr, v0, mem)
                return true
        }
-       // match: (MOVBstore [i1] {s} p (SRWconst w [24]) x0:(MOVBstore [i0] {s} p (SRWconst w [16]) mem))
-       // cond: !config.BigEndian && x0.Uses == 1 && i1 == i0+1 && clobber(x0)
-       // result: (MOVHstore [i0] {s} p (SRWconst <typ.UInt16> w [16]) mem)
+       return false
+}
+func rewriteValuePPC64_OpPPC64MOVBstoreidx(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (MOVBstoreidx ptr (MOVDconst [c]) val mem)
+       // cond: is16Bit(c)
+       // result: (MOVBstore [int32(c)] ptr val mem)
        for {
-               i1 := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpPPC64SRWconst || auxIntToInt64(v_1.AuxInt) != 24 {
-                       break
-               }
-               w := v_1.Args[0]
-               x0 := v_2
-               if x0.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i0 := auxIntToInt32(x0.AuxInt)
-               if auxToSym(x0.Aux) != s {
-                       break
-               }
-               mem := x0.Args[2]
-               if p != x0.Args[0] {
+               ptr := v_0
+               if v_1.Op != OpPPC64MOVDconst {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpPPC64SRWconst || auxIntToInt64(x0_1.AuxInt) != 16 || w != x0_1.Args[0] || !(!config.BigEndian && x0.Uses == 1 && i1 == i0+1 && clobber(x0)) {
+               c := auxIntToInt64(v_1.AuxInt)
+               val := v_2
+               mem := v_3
+               if !(is16Bit(c)) {
                        break
                }
-               v.reset(OpPPC64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i0)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x0.Pos, OpPPC64SRWconst, typ.UInt16)
-               v0.AuxInt = int64ToAuxInt(16)
-               v0.AddArg(w)
-               v.AddArg3(p, v0, mem)
+               v.reset(OpPPC64MOVBstore)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVBstore [i1] {s} p (SRDconst w [24]) x0:(MOVBstore [i0] {s} p (SRDconst w [16]) mem))
-       // cond: !config.BigEndian && x0.Uses == 1 && i1 == i0+1 && clobber(x0)
-       // result: (MOVHstore [i0] {s} p (SRWconst <typ.UInt16> w [16]) mem)
+       // match: (MOVBstoreidx (MOVDconst [c]) ptr val mem)
+       // cond: is16Bit(c)
+       // result: (MOVBstore [int32(c)] ptr val mem)
        for {
-               i1 := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpPPC64SRDconst || auxIntToInt64(v_1.AuxInt) != 24 {
-                       break
-               }
-               w := v_1.Args[0]
-               x0 := v_2
-               if x0.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i0 := auxIntToInt32(x0.AuxInt)
-               if auxToSym(x0.Aux) != s {
-                       break
-               }
-               mem := x0.Args[2]
-               if p != x0.Args[0] {
+               if v_0.Op != OpPPC64MOVDconst {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpPPC64SRDconst || auxIntToInt64(x0_1.AuxInt) != 16 || w != x0_1.Args[0] || !(!config.BigEndian && x0.Uses == 1 && i1 == i0+1 && clobber(x0)) {
+               c := auxIntToInt64(v_0.AuxInt)
+               ptr := v_1
+               val := v_2
+               mem := v_3
+               if !(is16Bit(c)) {
                        break
                }
-               v.reset(OpPPC64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i0)
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x0.Pos, OpPPC64SRWconst, typ.UInt16)
-               v0.AuxInt = int64ToAuxInt(16)
-               v0.AddArg(w)
-               v.AddArg3(p, v0, mem)
+               v.reset(OpPPC64MOVBstore)
+               v.AuxInt = int32ToAuxInt(int32(c))
+               v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (MOVBstore [i1] {s} p (SRWconst w [8]) x0:(MOVBstore [i0] {s} p w mem))
-       // cond: !config.BigEndian && x0.Uses == 1 && i1 == i0+1 && clobber(x0)
-       // result: (MOVHstore [i0] {s} p w mem)
+       // match: (MOVBstoreidx ptr idx (MOVBreg x) mem)
+       // result: (MOVBstoreidx ptr idx x mem)
        for {
-               i1 := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpPPC64SRWconst || auxIntToInt64(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x0 := v_2
-               if x0.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i0 := auxIntToInt32(x0.AuxInt)
-               if auxToSym(x0.Aux) != s {
-                       break
-               }
-               mem := x0.Args[2]
-               if p != x0.Args[0] || w != x0.Args[1] || !(!config.BigEndian && x0.Uses == 1 && i1 == i0+1 && clobber(x0)) {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpPPC64MOVBreg {
                        break
                }
-               v.reset(OpPPC64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i0)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               x := v_2.Args[0]
+               mem := v_3
+               v.reset(OpPPC64MOVBstoreidx)
+               v.AddArg4(ptr, idx, x, mem)
                return true
        }
-       // match: (MOVBstore [i1] {s} p (SRDconst w [8]) x0:(MOVBstore [i0] {s} p w mem))
-       // cond: !config.BigEndian && x0.Uses == 1 && i1 == i0+1 && clobber(x0)
-       // result: (MOVHstore [i0] {s} p w mem)
+       // match: (MOVBstoreidx ptr idx (MOVBZreg x) mem)
+       // result: (MOVBstoreidx ptr idx x mem)
        for {
-               i1 := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpPPC64SRDconst || auxIntToInt64(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x0 := v_2
-               if x0.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i0 := auxIntToInt32(x0.AuxInt)
-               if auxToSym(x0.Aux) != s {
-                       break
-               }
-               mem := x0.Args[2]
-               if p != x0.Args[0] || w != x0.Args[1] || !(!config.BigEndian && x0.Uses == 1 && i1 == i0+1 && clobber(x0)) {
-                       break
-               }
-               v.reset(OpPPC64MOVHstore)
-               v.AuxInt = int32ToAuxInt(i0)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i3] {s} p w x0:(MOVBstore [i2] {s} p (SRWconst w [8]) x1:(MOVBstore [i1] {s} p (SRWconst w [16]) x2:(MOVBstore [i0] {s} p (SRWconst w [24]) mem))))
-       // cond: !config.BigEndian && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && clobber(x0, x1, x2)
-       // result: (MOVWBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)
-       for {
-               i3 := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i2 := auxIntToInt32(x0.AuxInt)
-               if auxToSym(x0.Aux) != s {
-                       break
-               }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpPPC64SRWconst || auxIntToInt64(x0_1.AuxInt) != 8 || w != x0_1.Args[0] {
-                       break
-               }
-               x1 := x0.Args[2]
-               if x1.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i1 := auxIntToInt32(x1.AuxInt)
-               if auxToSym(x1.Aux) != s {
-                       break
-               }
-               _ = x1.Args[2]
-               if p != x1.Args[0] {
-                       break
-               }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpPPC64SRWconst || auxIntToInt64(x1_1.AuxInt) != 16 || w != x1_1.Args[0] {
-                       break
-               }
-               x2 := x1.Args[2]
-               if x2.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i0 := auxIntToInt32(x2.AuxInt)
-               if auxToSym(x2.Aux) != s {
-                       break
-               }
-               mem := x2.Args[2]
-               if p != x2.Args[0] {
-                       break
-               }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpPPC64SRWconst || auxIntToInt64(x2_1.AuxInt) != 24 || w != x2_1.Args[0] || !(!config.BigEndian && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && clobber(x0, x1, x2)) {
-                       break
-               }
-               v.reset(OpPPC64MOVWBRstore)
-               v0 := b.NewValue0(x2.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-               v0.AuxInt = int32ToAuxInt(i0)
-               v0.Aux = symToAux(s)
-               v0.AddArg(p)
-               v.AddArg3(v0, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i1] {s} p w x0:(MOVBstore [i0] {s} p (SRWconst w [8]) mem))
-       // cond: !config.BigEndian && x0.Uses == 1 && i1 == i0+1 && clobber(x0)
-       // result: (MOVHBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)
-       for {
-               i1 := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i0 := auxIntToInt32(x0.AuxInt)
-               if auxToSym(x0.Aux) != s {
-                       break
-               }
-               mem := x0.Args[2]
-               if p != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpPPC64SRWconst || auxIntToInt64(x0_1.AuxInt) != 8 || w != x0_1.Args[0] || !(!config.BigEndian && x0.Uses == 1 && i1 == i0+1 && clobber(x0)) {
-                       break
-               }
-               v.reset(OpPPC64MOVHBRstore)
-               v0 := b.NewValue0(x0.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-               v0.AuxInt = int32ToAuxInt(i0)
-               v0.Aux = symToAux(s)
-               v0.AddArg(p)
-               v.AddArg3(v0, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i7] {s} p (SRDconst w [56]) x0:(MOVBstore [i6] {s} p (SRDconst w [48]) x1:(MOVBstore [i5] {s} p (SRDconst w [40]) x2:(MOVBstore [i4] {s} p (SRDconst w [32]) x3:(MOVWstore [i0] {s} p w mem)))))
-       // cond: !config.BigEndian && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && clobber(x0, x1, x2, x3)
-       // result: (MOVDstore [i0] {s} p w mem)
-       for {
-               i7 := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpPPC64SRDconst || auxIntToInt64(v_1.AuxInt) != 56 {
-                       break
-               }
-               w := v_1.Args[0]
-               x0 := v_2
-               if x0.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i6 := auxIntToInt32(x0.AuxInt)
-               if auxToSym(x0.Aux) != s {
-                       break
-               }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpPPC64SRDconst || auxIntToInt64(x0_1.AuxInt) != 48 || w != x0_1.Args[0] {
-                       break
-               }
-               x1 := x0.Args[2]
-               if x1.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i5 := auxIntToInt32(x1.AuxInt)
-               if auxToSym(x1.Aux) != s {
-                       break
-               }
-               _ = x1.Args[2]
-               if p != x1.Args[0] {
-                       break
-               }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpPPC64SRDconst || auxIntToInt64(x1_1.AuxInt) != 40 || w != x1_1.Args[0] {
-                       break
-               }
-               x2 := x1.Args[2]
-               if x2.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i4 := auxIntToInt32(x2.AuxInt)
-               if auxToSym(x2.Aux) != s {
-                       break
-               }
-               _ = x2.Args[2]
-               if p != x2.Args[0] {
-                       break
-               }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpPPC64SRDconst || auxIntToInt64(x2_1.AuxInt) != 32 || w != x2_1.Args[0] {
-                       break
-               }
-               x3 := x2.Args[2]
-               if x3.Op != OpPPC64MOVWstore {
-                       break
-               }
-               i0 := auxIntToInt32(x3.AuxInt)
-               if auxToSym(x3.Aux) != s {
-                       break
-               }
-               mem := x3.Args[2]
-               if p != x3.Args[0] || w != x3.Args[1] || !(!config.BigEndian && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && clobber(x0, x1, x2, x3)) {
-                       break
-               }
-               v.reset(OpPPC64MOVDstore)
-               v.AuxInt = int32ToAuxInt(i0)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i7] {s} p w x0:(MOVBstore [i6] {s} p (SRDconst w [8]) x1:(MOVBstore [i5] {s} p (SRDconst w [16]) x2:(MOVBstore [i4] {s} p (SRDconst w [24]) x3:(MOVBstore [i3] {s} p (SRDconst w [32]) x4:(MOVBstore [i2] {s} p (SRDconst w [40]) x5:(MOVBstore [i1] {s} p (SRDconst w [48]) x6:(MOVBstore [i0] {s} p (SRDconst w [56]) mem))))))))
-       // cond: !config.BigEndian && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && clobber(x0, x1, x2, x3, x4, x5, x6)
-       // result: (MOVDBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)
-       for {
-               i7 := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x0 := v_2
-               if x0.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i6 := auxIntToInt32(x0.AuxInt)
-               if auxToSym(x0.Aux) != s {
-                       break
-               }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpPPC64SRDconst || auxIntToInt64(x0_1.AuxInt) != 8 || w != x0_1.Args[0] {
-                       break
-               }
-               x1 := x0.Args[2]
-               if x1.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i5 := auxIntToInt32(x1.AuxInt)
-               if auxToSym(x1.Aux) != s {
-                       break
-               }
-               _ = x1.Args[2]
-               if p != x1.Args[0] {
-                       break
-               }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpPPC64SRDconst || auxIntToInt64(x1_1.AuxInt) != 16 || w != x1_1.Args[0] {
-                       break
-               }
-               x2 := x1.Args[2]
-               if x2.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i4 := auxIntToInt32(x2.AuxInt)
-               if auxToSym(x2.Aux) != s {
-                       break
-               }
-               _ = x2.Args[2]
-               if p != x2.Args[0] {
-                       break
-               }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpPPC64SRDconst || auxIntToInt64(x2_1.AuxInt) != 24 || w != x2_1.Args[0] {
-                       break
-               }
-               x3 := x2.Args[2]
-               if x3.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i3 := auxIntToInt32(x3.AuxInt)
-               if auxToSym(x3.Aux) != s {
-                       break
-               }
-               _ = x3.Args[2]
-               if p != x3.Args[0] {
-                       break
-               }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpPPC64SRDconst || auxIntToInt64(x3_1.AuxInt) != 32 || w != x3_1.Args[0] {
-                       break
-               }
-               x4 := x3.Args[2]
-               if x4.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i2 := auxIntToInt32(x4.AuxInt)
-               if auxToSym(x4.Aux) != s {
-                       break
-               }
-               _ = x4.Args[2]
-               if p != x4.Args[0] {
-                       break
-               }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpPPC64SRDconst || auxIntToInt64(x4_1.AuxInt) != 40 || w != x4_1.Args[0] {
-                       break
-               }
-               x5 := x4.Args[2]
-               if x5.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i1 := auxIntToInt32(x5.AuxInt)
-               if auxToSym(x5.Aux) != s {
-                       break
-               }
-               _ = x5.Args[2]
-               if p != x5.Args[0] {
-                       break
-               }
-               x5_1 := x5.Args[1]
-               if x5_1.Op != OpPPC64SRDconst || auxIntToInt64(x5_1.AuxInt) != 48 || w != x5_1.Args[0] {
-                       break
-               }
-               x6 := x5.Args[2]
-               if x6.Op != OpPPC64MOVBstore {
-                       break
-               }
-               i0 := auxIntToInt32(x6.AuxInt)
-               if auxToSym(x6.Aux) != s {
-                       break
-               }
-               mem := x6.Args[2]
-               if p != x6.Args[0] {
-                       break
-               }
-               x6_1 := x6.Args[1]
-               if x6_1.Op != OpPPC64SRDconst || auxIntToInt64(x6_1.AuxInt) != 56 || w != x6_1.Args[0] || !(!config.BigEndian && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && clobber(x0, x1, x2, x3, x4, x5, x6)) {
-                       break
-               }
-               v.reset(OpPPC64MOVDBRstore)
-               v0 := b.NewValue0(x6.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-               v0.AuxInt = int32ToAuxInt(i0)
-               v0.Aux = symToAux(s)
-               v0.AddArg(p)
-               v.AddArg3(v0, w, mem)
-               return true
-       }
-       return false
-}
-func rewriteValuePPC64_OpPPC64MOVBstoreidx(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (MOVBstoreidx ptr (MOVDconst [c]) val mem)
-       // cond: is16Bit(c)
-       // result: (MOVBstore [int32(c)] ptr val mem)
-       for {
-               ptr := v_0
-               if v_1.Op != OpPPC64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               val := v_2
-               mem := v_3
-               if !(is16Bit(c)) {
-                       break
-               }
-               v.reset(OpPPC64MOVBstore)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVBstoreidx (MOVDconst [c]) ptr val mem)
-       // cond: is16Bit(c)
-       // result: (MOVBstore [int32(c)] ptr val mem)
-       for {
-               if v_0.Op != OpPPC64MOVDconst {
-                       break
-               }
-               c := auxIntToInt64(v_0.AuxInt)
-               ptr := v_1
-               val := v_2
-               mem := v_3
-               if !(is16Bit(c)) {
-                       break
-               }
-               v.reset(OpPPC64MOVBstore)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (MOVBstoreidx ptr idx (MOVBreg x) mem)
-       // result: (MOVBstoreidx ptr idx x mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpPPC64MOVBreg {
-                       break
-               }
-               x := v_2.Args[0]
-               mem := v_3
-               v.reset(OpPPC64MOVBstoreidx)
-               v.AddArg4(ptr, idx, x, mem)
-               return true
-       }
-       // match: (MOVBstoreidx ptr idx (MOVBZreg x) mem)
-       // result: (MOVBstoreidx ptr idx x mem)
-       for {
-               ptr := v_0
-               idx := v_1
-               if v_2.Op != OpPPC64MOVBZreg {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpPPC64MOVBZreg {
                        break
                }
                x := v_2.Args[0]
@@ -8047,6 +7899,7 @@ func rewriteValuePPC64_OpPPC64MOVDstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
+       b := v.Block
        // match: (MOVDstore [off] {sym} ptr (MFVSRD x) mem)
        // result: (FMOVDstore [off] {sym} ptr x mem)
        for {
@@ -8149,6 +8002,49 @@ func rewriteValuePPC64_OpPPC64MOVDstore(v *Value) bool {
                v.AddArg4(ptr, idx, val, mem)
                return true
        }
+       // match: (MOVDstore [off] {sym} ptr r:(BRD val) mem)
+       // cond: r.Uses == 1
+       // result: (MOVDBRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               r := v_1
+               if r.Op != OpPPC64BRD {
+                       break
+               }
+               val := r.Args[0]
+               mem := v_2
+               if !(r.Uses == 1) {
+                       break
+               }
+               v.reset(OpPPC64MOVDBRstore)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg(ptr)
+               v.AddArg3(v0, val, mem)
+               return true
+       }
+       // match: (MOVDstore [off] {sym} ptr (Bswap64 val) mem)
+       // result: (MOVDBRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpBswap64 {
+                       break
+               }
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpPPC64MOVDBRstore)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg(ptr)
+               v.AddArg3(v0, val, mem)
+               return true
+       }
        return false
 }
 func rewriteValuePPC64_OpPPC64MOVDstoreidx(v *Value) bool {
@@ -8194,22 +8090,55 @@ func rewriteValuePPC64_OpPPC64MOVDstoreidx(v *Value) bool {
                v.AddArg3(ptr, val, mem)
                return true
        }
-       return false
-}
-func rewriteValuePPC64_OpPPC64MOVDstorezero(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (MOVDstorezero [off1] {sym} (ADDconst [off2] x) mem)
-       // cond: is16Bit(int64(off1)+off2)
-       // result: (MOVDstorezero [off1+int32(off2)] {sym} x mem)
+       // match: (MOVDstoreidx ptr idx r:(BRD val) mem)
+       // cond: r.Uses == 1
+       // result: (MOVDBRstoreidx ptr idx val mem)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               if v_0.Op != OpPPC64ADDconst {
+               ptr := v_0
+               idx := v_1
+               r := v_2
+               if r.Op != OpPPC64BRD {
                        break
                }
-               off2 := auxIntToInt64(v_0.AuxInt)
-               x := v_0.Args[0]
+               val := r.Args[0]
+               mem := v_3
+               if !(r.Uses == 1) {
+                       break
+               }
+               v.reset(OpPPC64MOVDBRstoreidx)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (MOVDstoreidx ptr idx (Bswap64 val) mem)
+       // result: (MOVDBRstoreidx ptr idx val mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpBswap64 {
+                       break
+               }
+               val := v_2.Args[0]
+               mem := v_3
+               v.reset(OpPPC64MOVDBRstoreidx)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       return false
+}
+func rewriteValuePPC64_OpPPC64MOVDstorezero(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVDstorezero [off1] {sym} (ADDconst [off2] x) mem)
+       // cond: is16Bit(int64(off1)+off2)
+       // result: (MOVDstorezero [off1+int32(off2)] {sym} x mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpPPC64ADDconst {
+                       break
+               }
+               off2 := auxIntToInt64(v_0.AuxInt)
+               x := v_0.Args[0]
                mem := v_1
                if !(is16Bit(int64(off1) + off2)) {
                        break
@@ -8249,10 +8178,9 @@ func rewriteValuePPC64_OpPPC64MOVHBRstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVHBRstore {sym} ptr (MOVHreg x) mem)
-       // result: (MOVHBRstore {sym} ptr x mem)
+       // match: (MOVHBRstore ptr (MOVHreg x) mem)
+       // result: (MOVHBRstore ptr x mem)
        for {
-               sym := auxToSym(v.Aux)
                ptr := v_0
                if v_1.Op != OpPPC64MOVHreg {
                        break
@@ -8260,14 +8188,12 @@ func rewriteValuePPC64_OpPPC64MOVHBRstore(v *Value) bool {
                x := v_1.Args[0]
                mem := v_2
                v.reset(OpPPC64MOVHBRstore)
-               v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVHBRstore {sym} ptr (MOVHZreg x) mem)
-       // result: (MOVHBRstore {sym} ptr x mem)
+       // match: (MOVHBRstore ptr (MOVHZreg x) mem)
+       // result: (MOVHBRstore ptr x mem)
        for {
-               sym := auxToSym(v.Aux)
                ptr := v_0
                if v_1.Op != OpPPC64MOVHZreg {
                        break
@@ -8275,14 +8201,12 @@ func rewriteValuePPC64_OpPPC64MOVHBRstore(v *Value) bool {
                x := v_1.Args[0]
                mem := v_2
                v.reset(OpPPC64MOVHBRstore)
-               v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVHBRstore {sym} ptr (MOVWreg x) mem)
-       // result: (MOVHBRstore {sym} ptr x mem)
+       // match: (MOVHBRstore ptr (MOVWreg x) mem)
+       // result: (MOVHBRstore ptr x mem)
        for {
-               sym := auxToSym(v.Aux)
                ptr := v_0
                if v_1.Op != OpPPC64MOVWreg {
                        break
@@ -8290,14 +8214,12 @@ func rewriteValuePPC64_OpPPC64MOVHBRstore(v *Value) bool {
                x := v_1.Args[0]
                mem := v_2
                v.reset(OpPPC64MOVHBRstore)
-               v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVHBRstore {sym} ptr (MOVWZreg x) mem)
-       // result: (MOVHBRstore {sym} ptr x mem)
+       // match: (MOVHBRstore ptr (MOVWZreg x) mem)
+       // result: (MOVHBRstore ptr x mem)
        for {
-               sym := auxToSym(v.Aux)
                ptr := v_0
                if v_1.Op != OpPPC64MOVWZreg {
                        break
@@ -8305,7 +8227,6 @@ func rewriteValuePPC64_OpPPC64MOVHBRstore(v *Value) bool {
                x := v_1.Args[0]
                mem := v_2
                v.reset(OpPPC64MOVHBRstore)
-               v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
@@ -9223,7 +9144,6 @@ func rewriteValuePPC64_OpPPC64MOVHstore(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       config := b.Func.Config
        // match: (MOVHstore [off1] {sym} (ADDconst [off2] x) val mem)
        // cond: is16Bit(int64(off1)+off2)
        // result: (MOVHstore [off1+int32(off2)] {sym} x val mem)
@@ -9377,62 +9297,47 @@ func rewriteValuePPC64_OpPPC64MOVHstore(v *Value) bool {
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVHstore [i1] {s} p (SRWconst w [16]) x0:(MOVHstore [i0] {s} p w mem))
-       // cond: !config.BigEndian && x0.Uses == 1 && i1 == i0+2 && clobber(x0)
-       // result: (MOVWstore [i0] {s} p w mem)
+       // match: (MOVHstore [off] {sym} ptr r:(BRH val) mem)
+       // cond: r.Uses == 1
+       // result: (MOVHBRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem)
        for {
-               i1 := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpPPC64SRWconst || auxIntToInt64(v_1.AuxInt) != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x0 := v_2
-               if x0.Op != OpPPC64MOVHstore {
-                       break
-               }
-               i0 := auxIntToInt32(x0.AuxInt)
-               if auxToSym(x0.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               r := v_1
+               if r.Op != OpPPC64BRH {
                        break
                }
-               mem := x0.Args[2]
-               if p != x0.Args[0] || w != x0.Args[1] || !(!config.BigEndian && x0.Uses == 1 && i1 == i0+2 && clobber(x0)) {
+               val := r.Args[0]
+               mem := v_2
+               if !(r.Uses == 1) {
                        break
                }
-               v.reset(OpPPC64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i0)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               v.reset(OpPPC64MOVHBRstore)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg(ptr)
+               v.AddArg3(v0, val, mem)
                return true
        }
-       // match: (MOVHstore [i1] {s} p (SRDconst w [16]) x0:(MOVHstore [i0] {s} p w mem))
-       // cond: !config.BigEndian && x0.Uses == 1 && i1 == i0+2 && clobber(x0)
-       // result: (MOVWstore [i0] {s} p w mem)
+       // match: (MOVHstore [off] {sym} ptr (Bswap16 val) mem)
+       // result: (MOVHBRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem)
        for {
-               i1 := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpPPC64SRDconst || auxIntToInt64(v_1.AuxInt) != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x0 := v_2
-               if x0.Op != OpPPC64MOVHstore {
-                       break
-               }
-               i0 := auxIntToInt32(x0.AuxInt)
-               if auxToSym(x0.Aux) != s {
-                       break
-               }
-               mem := x0.Args[2]
-               if p != x0.Args[0] || w != x0.Args[1] || !(!config.BigEndian && x0.Uses == 1 && i1 == i0+2 && clobber(x0)) {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpBswap16 {
                        break
                }
-               v.reset(OpPPC64MOVWstore)
-               v.AuxInt = int32ToAuxInt(i0)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpPPC64MOVHBRstore)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg(ptr)
+               v.AddArg3(v0, val, mem)
                return true
        }
        return false
@@ -9536,6 +9441,39 @@ func rewriteValuePPC64_OpPPC64MOVHstoreidx(v *Value) bool {
                v.AddArg4(ptr, idx, x, mem)
                return true
        }
+       // match: (MOVHstoreidx ptr idx r:(BRH val) mem)
+       // cond: r.Uses == 1
+       // result: (MOVHBRstoreidx ptr idx val mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               r := v_2
+               if r.Op != OpPPC64BRH {
+                       break
+               }
+               val := r.Args[0]
+               mem := v_3
+               if !(r.Uses == 1) {
+                       break
+               }
+               v.reset(OpPPC64MOVHBRstoreidx)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (MOVHstoreidx ptr idx (Bswap16 val) mem)
+       // result: (MOVHBRstoreidx ptr idx val mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpBswap16 {
+                       break
+               }
+               val := v_2.Args[0]
+               mem := v_3
+               v.reset(OpPPC64MOVHBRstoreidx)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
        return false
 }
 func rewriteValuePPC64_OpPPC64MOVHstorezero(v *Value) bool {
@@ -9591,10 +9529,9 @@ func rewriteValuePPC64_OpPPC64MOVWBRstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVWBRstore {sym} ptr (MOVWreg x) mem)
-       // result: (MOVWBRstore {sym} ptr x mem)
+       // match: (MOVWBRstore ptr (MOVWreg x) mem)
+       // result: (MOVWBRstore ptr x mem)
        for {
-               sym := auxToSym(v.Aux)
                ptr := v_0
                if v_1.Op != OpPPC64MOVWreg {
                        break
@@ -9602,14 +9539,12 @@ func rewriteValuePPC64_OpPPC64MOVWBRstore(v *Value) bool {
                x := v_1.Args[0]
                mem := v_2
                v.reset(OpPPC64MOVWBRstore)
-               v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
-       // match: (MOVWBRstore {sym} ptr (MOVWZreg x) mem)
-       // result: (MOVWBRstore {sym} ptr x mem)
+       // match: (MOVWBRstore ptr (MOVWZreg x) mem)
+       // result: (MOVWBRstore ptr x mem)
        for {
-               sym := auxToSym(v.Aux)
                ptr := v_0
                if v_1.Op != OpPPC64MOVWZreg {
                        break
@@ -9617,7 +9552,6 @@ func rewriteValuePPC64_OpPPC64MOVWBRstore(v *Value) bool {
                x := v_1.Args[0]
                mem := v_2
                v.reset(OpPPC64MOVWBRstore)
-               v.Aux = symToAux(sym)
                v.AddArg3(ptr, x, mem)
                return true
        }
@@ -10580,6 +10514,7 @@ func rewriteValuePPC64_OpPPC64MOVWstore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
+       b := v.Block
        // match: (MOVWstore [off1] {sym} (ADDconst [off2] x) val mem)
        // cond: is16Bit(int64(off1)+off2)
        // result: (MOVWstore [off1+int32(off2)] {sym} x val mem)
@@ -10699,6 +10634,49 @@ func rewriteValuePPC64_OpPPC64MOVWstore(v *Value) bool {
                v.AddArg3(ptr, x, mem)
                return true
        }
+       // match: (MOVWstore [off] {sym} ptr r:(BRW val) mem)
+       // cond: r.Uses == 1
+       // result: (MOVWBRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               r := v_1
+               if r.Op != OpPPC64BRW {
+                       break
+               }
+               val := r.Args[0]
+               mem := v_2
+               if !(r.Uses == 1) {
+                       break
+               }
+               v.reset(OpPPC64MOVWBRstore)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg(ptr)
+               v.AddArg3(v0, val, mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (Bswap32 val) mem)
+       // result: (MOVWBRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpBswap32 {
+                       break
+               }
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpPPC64MOVWBRstore)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVDaddr, ptr.Type)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg(ptr)
+               v.AddArg3(v0, val, mem)
+               return true
+       }
        return false
 }
 func rewriteValuePPC64_OpPPC64MOVWstoreidx(v *Value) bool {
@@ -10772,6 +10750,39 @@ func rewriteValuePPC64_OpPPC64MOVWstoreidx(v *Value) bool {
                v.AddArg4(ptr, idx, x, mem)
                return true
        }
+       // match: (MOVWstoreidx ptr idx r:(BRW val) mem)
+       // cond: r.Uses == 1
+       // result: (MOVWBRstoreidx ptr idx val mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               r := v_2
+               if r.Op != OpPPC64BRW {
+                       break
+               }
+               val := r.Args[0]
+               mem := v_3
+               if !(r.Uses == 1) {
+                       break
+               }
+               v.reset(OpPPC64MOVWBRstoreidx)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr idx (Bswap32 val) mem)
+       // result: (MOVWBRstoreidx ptr idx val mem)
+       for {
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpBswap32 {
+                       break
+               }
+               val := v_2.Args[0]
+               mem := v_3
+               v.reset(OpPPC64MOVWBRstoreidx)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
        return false
 }
 func rewriteValuePPC64_OpPPC64MOVWstorezero(v *Value) bool {
@@ -11056,9 +11067,6 @@ func rewriteValuePPC64_OpPPC64NotEqual(v *Value) bool {
 func rewriteValuePPC64_OpPPC64OR(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       config := b.Func.Config
-       typ := &b.Func.Config.Types
        // match: (OR x (NOR y y))
        // result: (ORN x y)
        for {
@@ -11115,1293 +11123,6 @@ func rewriteValuePPC64_OpPPC64OR(v *Value) bool {
                }
                break
        }
-       // match: (OR <t> x0:(MOVBZload [i0] {s} p mem) o1:(SLWconst x1:(MOVBZload [i1] {s} p mem) [8]))
-       // cond: !config.BigEndian && i1 == i0+1 && x0.Uses ==1 && x1.Uses == 1 && o1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, o1)
-       // result: @mergePoint(b,x0,x1) (MOVHZload <t> {s} [i0] p mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       o1 := v_1
-                       if o1.Op != OpPPC64SLWconst || auxIntToInt64(o1.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := o1.Args[0]
-                       if x1.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(!config.BigEndian && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && o1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, o1)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpPPC64MOVHZload, t)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> x0:(MOVBZload [i0] {s} p mem) o1:(SLDconst x1:(MOVBZload [i1] {s} p mem) [8]))
-       // cond: !config.BigEndian && i1 == i0+1 && x0.Uses ==1 && x1.Uses == 1 && o1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, o1)
-       // result: @mergePoint(b,x0,x1) (MOVHZload <t> {s} [i0] p mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       o1 := v_1
-                       if o1.Op != OpPPC64SLDconst || auxIntToInt64(o1.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := o1.Args[0]
-                       if x1.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(!config.BigEndian && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && o1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, o1)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpPPC64MOVHZload, t)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> x0:(MOVBZload [i1] {s} p mem) o1:(SLWconst x1:(MOVBZload [i0] {s} p mem) [8]))
-       // cond: !config.BigEndian && i1 == i0+1 && x0.Uses ==1 && x1.Uses == 1 && o1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, o1)
-       // result: @mergePoint(b,x0,x1) (MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       o1 := v_1
-                       if o1.Op != OpPPC64SLWconst || auxIntToInt64(o1.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := o1.Args[0]
-                       if x1.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(!config.BigEndian && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && o1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, o1)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpPPC64MOVHBRload, t)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x1.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                       v1.AuxInt = int32ToAuxInt(i0)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg(p)
-                       v0.AddArg2(v1, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> x0:(MOVBZload [i1] {s} p mem) o1:(SLDconst x1:(MOVBZload [i0] {s} p mem) [8]))
-       // cond: !config.BigEndian && i1 == i0+1 && x0.Uses ==1 && x1.Uses == 1 && o1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, o1)
-       // result: @mergePoint(b,x0,x1) (MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       o1 := v_1
-                       if o1.Op != OpPPC64SLDconst || auxIntToInt64(o1.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := o1.Args[0]
-                       if x1.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(!config.BigEndian && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && o1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, o1)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpPPC64MOVHBRload, t)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x1.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                       v1.AuxInt = int32ToAuxInt(i0)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg(p)
-                       v0.AddArg2(v1, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> s0:(SLWconst x0:(MOVBZload [i1] {s} p mem) [n1]) s1:(SLWconst x1:(MOVBZload [i0] {s} p mem) [n2]))
-       // cond: !config.BigEndian && i1 == i0+1 && n1%8 == 0 && n2 == n1+8 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0, s1)
-       // result: @mergePoint(b,x0,x1) (SLDconst <t> (MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [n1])
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpPPC64SLWconst {
-                               continue
-                       }
-                       n1 := auxIntToInt64(s0.AuxInt)
-                       x0 := s0.Args[0]
-                       if x0.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       s1 := v_1
-                       if s1.Op != OpPPC64SLWconst {
-                               continue
-                       }
-                       n2 := auxIntToInt64(s1.AuxInt)
-                       x1 := s1.Args[0]
-                       if x1.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(!config.BigEndian && i1 == i0+1 && n1%8 == 0 && n2 == n1+8 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0, s1)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpPPC64SLDconst, t)
-                       v.copyOf(v0)
-                       v0.AuxInt = int64ToAuxInt(n1)
-                       v1 := b.NewValue0(x1.Pos, OpPPC64MOVHBRload, t)
-                       v2 := b.NewValue0(x1.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                       v2.AuxInt = int32ToAuxInt(i0)
-                       v2.Aux = symToAux(s)
-                       v2.AddArg(p)
-                       v1.AddArg2(v2, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> s0:(SLDconst x0:(MOVBZload [i1] {s} p mem) [n1]) s1:(SLDconst x1:(MOVBZload [i0] {s} p mem) [n2]))
-       // cond: !config.BigEndian && i1 == i0+1 && n1%8 == 0 && n2 == n1+8 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0, s1)
-       // result: @mergePoint(b,x0,x1) (SLDconst <t> (MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [n1])
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpPPC64SLDconst {
-                               continue
-                       }
-                       n1 := auxIntToInt64(s0.AuxInt)
-                       x0 := s0.Args[0]
-                       if x0.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       s1 := v_1
-                       if s1.Op != OpPPC64SLDconst {
-                               continue
-                       }
-                       n2 := auxIntToInt64(s1.AuxInt)
-                       x1 := s1.Args[0]
-                       if x1.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(!config.BigEndian && i1 == i0+1 && n1%8 == 0 && n2 == n1+8 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0, s1)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpPPC64SLDconst, t)
-                       v.copyOf(v0)
-                       v0.AuxInt = int64ToAuxInt(n1)
-                       v1 := b.NewValue0(x1.Pos, OpPPC64MOVHBRload, t)
-                       v2 := b.NewValue0(x1.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                       v2.AuxInt = int32ToAuxInt(i0)
-                       v2.Aux = symToAux(s)
-                       v2.AddArg(p)
-                       v1.AddArg2(v2, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (OR <t> s1:(SLWconst x2:(MOVBZload [i3] {s} p mem) [24]) o0:(OR <t> s0:(SLWconst x1:(MOVBZload [i2] {s} p mem) [16]) x0:(MOVHZload [i0] {s} p mem)))
-       // cond: !config.BigEndian && i2 == i0+2 && i3 == i0+3 && x0.Uses ==1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWZload <t> {s} [i0] p mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpPPC64SLWconst || auxIntToInt64(s1.AuxInt) != 24 {
-                               continue
-                       }
-                       x2 := s1.Args[0]
-                       if x2.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i3 := auxIntToInt32(x2.AuxInt)
-                       s := auxToSym(x2.Aux)
-                       mem := x2.Args[1]
-                       p := x2.Args[0]
-                       o0 := v_1
-                       if o0.Op != OpPPC64OR || o0.Type != t {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o0_0 := o0.Args[0]
-                       o0_1 := o0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-                               s0 := o0_0
-                               if s0.Op != OpPPC64SLWconst || auxIntToInt64(s0.AuxInt) != 16 {
-                                       continue
-                               }
-                               x1 := s0.Args[0]
-                               if x1.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i2 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               x0 := o0_1
-                               if x0.Op != OpPPC64MOVHZload {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x0.AuxInt)
-                               if auxToSym(x0.Aux) != s {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               if p != x0.Args[0] || mem != x0.Args[1] || !(!config.BigEndian && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2)
-                               v0 := b.NewValue0(x0.Pos, OpPPC64MOVWZload, t)
-                               v.copyOf(v0)
-                               v0.AuxInt = int32ToAuxInt(i0)
-                               v0.Aux = symToAux(s)
-                               v0.AddArg2(p, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> s1:(SLDconst x2:(MOVBZload [i3] {s} p mem) [24]) o0:(OR <t> s0:(SLDconst x1:(MOVBZload [i2] {s} p mem) [16]) x0:(MOVHZload [i0] {s} p mem)))
-       // cond: !config.BigEndian && i2 == i0+2 && i3 == i0+3 && x0.Uses ==1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWZload <t> {s} [i0] p mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpPPC64SLDconst || auxIntToInt64(s1.AuxInt) != 24 {
-                               continue
-                       }
-                       x2 := s1.Args[0]
-                       if x2.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i3 := auxIntToInt32(x2.AuxInt)
-                       s := auxToSym(x2.Aux)
-                       mem := x2.Args[1]
-                       p := x2.Args[0]
-                       o0 := v_1
-                       if o0.Op != OpPPC64OR || o0.Type != t {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o0_0 := o0.Args[0]
-                       o0_1 := o0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-                               s0 := o0_0
-                               if s0.Op != OpPPC64SLDconst || auxIntToInt64(s0.AuxInt) != 16 {
-                                       continue
-                               }
-                               x1 := s0.Args[0]
-                               if x1.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i2 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               x0 := o0_1
-                               if x0.Op != OpPPC64MOVHZload {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x0.AuxInt)
-                               if auxToSym(x0.Aux) != s {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               if p != x0.Args[0] || mem != x0.Args[1] || !(!config.BigEndian && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2)
-                               v0 := b.NewValue0(x0.Pos, OpPPC64MOVWZload, t)
-                               v.copyOf(v0)
-                               v0.AuxInt = int32ToAuxInt(i0)
-                               v0.Aux = symToAux(s)
-                               v0.AddArg2(p, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> s1:(SLWconst x2:(MOVBZload [i0] {s} p mem) [24]) o0:(OR <t> s0:(SLWconst x1:(MOVBZload [i1] {s} p mem) [16]) x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i2] {s} p) mem)))
-       // cond: !config.BigEndian && i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpPPC64SLWconst || auxIntToInt64(s1.AuxInt) != 24 {
-                               continue
-                       }
-                       x2 := s1.Args[0]
-                       if x2.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x2.AuxInt)
-                       s := auxToSym(x2.Aux)
-                       mem := x2.Args[1]
-                       p := x2.Args[0]
-                       o0 := v_1
-                       if o0.Op != OpPPC64OR || o0.Type != t {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o0_0 := o0.Args[0]
-                       o0_1 := o0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-                               s0 := o0_0
-                               if s0.Op != OpPPC64SLWconst || auxIntToInt64(s0.AuxInt) != 16 {
-                                       continue
-                               }
-                               x1 := s0.Args[0]
-                               if x1.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i1 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               x0 := o0_1
-                               if x0.Op != OpPPC64MOVHBRload || x0.Type != t {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               x0_0 := x0.Args[0]
-                               if x0_0.Op != OpPPC64MOVDaddr || x0_0.Type != typ.Uintptr {
-                                       continue
-                               }
-                               i2 := auxIntToInt32(x0_0.AuxInt)
-                               if auxToSym(x0_0.Aux) != s || p != x0_0.Args[0] || mem != x0.Args[1] || !(!config.BigEndian && i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2)
-                               v0 := b.NewValue0(x0.Pos, OpPPC64MOVWBRload, t)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x0.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                               v1.AuxInt = int32ToAuxInt(i0)
-                               v1.Aux = symToAux(s)
-                               v1.AddArg(p)
-                               v0.AddArg2(v1, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> s1:(SLDconst x2:(MOVBZload [i0] {s} p mem) [24]) o0:(OR <t> s0:(SLDconst x1:(MOVBZload [i1] {s} p mem) [16]) x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i2] {s} p) mem)))
-       // cond: !config.BigEndian && i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpPPC64SLDconst || auxIntToInt64(s1.AuxInt) != 24 {
-                               continue
-                       }
-                       x2 := s1.Args[0]
-                       if x2.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x2.AuxInt)
-                       s := auxToSym(x2.Aux)
-                       mem := x2.Args[1]
-                       p := x2.Args[0]
-                       o0 := v_1
-                       if o0.Op != OpPPC64OR || o0.Type != t {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o0_0 := o0.Args[0]
-                       o0_1 := o0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-                               s0 := o0_0
-                               if s0.Op != OpPPC64SLDconst || auxIntToInt64(s0.AuxInt) != 16 {
-                                       continue
-                               }
-                               x1 := s0.Args[0]
-                               if x1.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i1 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               x0 := o0_1
-                               if x0.Op != OpPPC64MOVHBRload || x0.Type != t {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               x0_0 := x0.Args[0]
-                               if x0_0.Op != OpPPC64MOVDaddr || x0_0.Type != typ.Uintptr {
-                                       continue
-                               }
-                               i2 := auxIntToInt32(x0_0.AuxInt)
-                               if auxToSym(x0_0.Aux) != s || p != x0_0.Args[0] || mem != x0.Args[1] || !(!config.BigEndian && i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2)
-                               v0 := b.NewValue0(x0.Pos, OpPPC64MOVWBRload, t)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x0.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                               v1.AuxInt = int32ToAuxInt(i0)
-                               v1.Aux = symToAux(s)
-                               v1.AddArg(p)
-                               v0.AddArg2(v1, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> x0:(MOVBZload [i3] {s} p mem) o0:(OR <t> s0:(SLWconst x1:(MOVBZload [i2] {s} p mem) [8]) s1:(SLWconst x2:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [16])))
-       // cond: !config.BigEndian && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i3 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       o0 := v_1
-                       if o0.Op != OpPPC64OR || o0.Type != t {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o0_0 := o0.Args[0]
-                       o0_1 := o0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-                               s0 := o0_0
-                               if s0.Op != OpPPC64SLWconst || auxIntToInt64(s0.AuxInt) != 8 {
-                                       continue
-                               }
-                               x1 := s0.Args[0]
-                               if x1.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i2 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               s1 := o0_1
-                               if s1.Op != OpPPC64SLWconst || auxIntToInt64(s1.AuxInt) != 16 {
-                                       continue
-                               }
-                               x2 := s1.Args[0]
-                               if x2.Op != OpPPC64MOVHBRload || x2.Type != t {
-                                       continue
-                               }
-                               _ = x2.Args[1]
-                               x2_0 := x2.Args[0]
-                               if x2_0.Op != OpPPC64MOVDaddr || x2_0.Type != typ.Uintptr {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x2_0.AuxInt)
-                               if auxToSym(x2_0.Aux) != s || p != x2_0.Args[0] || mem != x2.Args[1] || !(!config.BigEndian && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2)
-                               v0 := b.NewValue0(x2.Pos, OpPPC64MOVWBRload, t)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x2.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                               v1.AuxInt = int32ToAuxInt(i0)
-                               v1.Aux = symToAux(s)
-                               v1.AddArg(p)
-                               v0.AddArg2(v1, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> x0:(MOVBZload [i3] {s} p mem) o0:(OR <t> s0:(SLDconst x1:(MOVBZload [i2] {s} p mem) [8]) s1:(SLDconst x2:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [16])))
-       // cond: !config.BigEndian && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i3 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       o0 := v_1
-                       if o0.Op != OpPPC64OR || o0.Type != t {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o0_0 := o0.Args[0]
-                       o0_1 := o0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-                               s0 := o0_0
-                               if s0.Op != OpPPC64SLDconst || auxIntToInt64(s0.AuxInt) != 8 {
-                                       continue
-                               }
-                               x1 := s0.Args[0]
-                               if x1.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i2 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               s1 := o0_1
-                               if s1.Op != OpPPC64SLDconst || auxIntToInt64(s1.AuxInt) != 16 {
-                                       continue
-                               }
-                               x2 := s1.Args[0]
-                               if x2.Op != OpPPC64MOVHBRload || x2.Type != t {
-                                       continue
-                               }
-                               _ = x2.Args[1]
-                               x2_0 := x2.Args[0]
-                               if x2_0.Op != OpPPC64MOVDaddr || x2_0.Type != typ.Uintptr {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x2_0.AuxInt)
-                               if auxToSym(x2_0.Aux) != s || p != x2_0.Args[0] || mem != x2.Args[1] || !(!config.BigEndian && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2)
-                               v0 := b.NewValue0(x2.Pos, OpPPC64MOVWBRload, t)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x2.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                               v1.AuxInt = int32ToAuxInt(i0)
-                               v1.Aux = symToAux(s)
-                               v1.AddArg(p)
-                               v0.AddArg2(v1, mem)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> s2:(SLDconst x2:(MOVBZload [i3] {s} p mem) [32]) o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i2] {s} p mem) [40]) s0:(SLDconst x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [48])))
-       // cond: !config.BigEndian && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, s2, o0)
-       // result: @mergePoint(b,x0,x1,x2) (SLDconst <t> (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s2 := v_0
-                       if s2.Op != OpPPC64SLDconst || auxIntToInt64(s2.AuxInt) != 32 {
-                               continue
-                       }
-                       x2 := s2.Args[0]
-                       if x2.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i3 := auxIntToInt32(x2.AuxInt)
-                       s := auxToSym(x2.Aux)
-                       mem := x2.Args[1]
-                       p := x2.Args[0]
-                       o0 := v_1
-                       if o0.Op != OpPPC64OR || o0.Type != t {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o0_0 := o0.Args[0]
-                       o0_1 := o0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-                               s1 := o0_0
-                               if s1.Op != OpPPC64SLDconst || auxIntToInt64(s1.AuxInt) != 40 {
-                                       continue
-                               }
-                               x1 := s1.Args[0]
-                               if x1.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i2 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               s0 := o0_1
-                               if s0.Op != OpPPC64SLDconst || auxIntToInt64(s0.AuxInt) != 48 {
-                                       continue
-                               }
-                               x0 := s0.Args[0]
-                               if x0.Op != OpPPC64MOVHBRload || x0.Type != t {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               x0_0 := x0.Args[0]
-                               if x0_0.Op != OpPPC64MOVDaddr || x0_0.Type != typ.Uintptr {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x0_0.AuxInt)
-                               if auxToSym(x0_0.Aux) != s || p != x0_0.Args[0] || mem != x0.Args[1] || !(!config.BigEndian && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, s2, o0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2)
-                               v0 := b.NewValue0(x0.Pos, OpPPC64SLDconst, t)
-                               v.copyOf(v0)
-                               v0.AuxInt = int64ToAuxInt(32)
-                               v1 := b.NewValue0(x0.Pos, OpPPC64MOVWBRload, t)
-                               v2 := b.NewValue0(x0.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                               v2.AuxInt = int32ToAuxInt(i0)
-                               v2.Aux = symToAux(s)
-                               v2.AddArg(p)
-                               v1.AddArg2(v2, mem)
-                               v0.AddArg(v1)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> s2:(SLDconst x2:(MOVBZload [i0] {s} p mem) [56]) o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i1] {s} p mem) [48]) s0:(SLDconst x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i2] {s} p) mem) [32])))
-       // cond: !config.BigEndian && i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, s2, o0)
-       // result: @mergePoint(b,x0,x1,x2) (SLDconst <t> (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s2 := v_0
-                       if s2.Op != OpPPC64SLDconst || auxIntToInt64(s2.AuxInt) != 56 {
-                               continue
-                       }
-                       x2 := s2.Args[0]
-                       if x2.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x2.AuxInt)
-                       s := auxToSym(x2.Aux)
-                       mem := x2.Args[1]
-                       p := x2.Args[0]
-                       o0 := v_1
-                       if o0.Op != OpPPC64OR || o0.Type != t {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o0_0 := o0.Args[0]
-                       o0_1 := o0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-                               s1 := o0_0
-                               if s1.Op != OpPPC64SLDconst || auxIntToInt64(s1.AuxInt) != 48 {
-                                       continue
-                               }
-                               x1 := s1.Args[0]
-                               if x1.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i1 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               s0 := o0_1
-                               if s0.Op != OpPPC64SLDconst || auxIntToInt64(s0.AuxInt) != 32 {
-                                       continue
-                               }
-                               x0 := s0.Args[0]
-                               if x0.Op != OpPPC64MOVHBRload || x0.Type != t {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               x0_0 := x0.Args[0]
-                               if x0_0.Op != OpPPC64MOVDaddr || x0_0.Type != typ.Uintptr {
-                                       continue
-                               }
-                               i2 := auxIntToInt32(x0_0.AuxInt)
-                               if auxToSym(x0_0.Aux) != s || p != x0_0.Args[0] || mem != x0.Args[1] || !(!config.BigEndian && i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && o0.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, s2, o0)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, x2)
-                               v0 := b.NewValue0(x0.Pos, OpPPC64SLDconst, t)
-                               v.copyOf(v0)
-                               v0.AuxInt = int64ToAuxInt(32)
-                               v1 := b.NewValue0(x0.Pos, OpPPC64MOVWBRload, t)
-                               v2 := b.NewValue0(x0.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                               v2.AuxInt = int32ToAuxInt(i0)
-                               v2.Aux = symToAux(s)
-                               v2.AddArg(p)
-                               v1.AddArg2(v2, mem)
-                               v0.AddArg(v1)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR <t> s6:(SLDconst x7:(MOVBZload [i7] {s} p mem) [56]) o5:(OR <t> s5:(SLDconst x6:(MOVBZload [i6] {s} p mem) [48]) o4:(OR <t> s4:(SLDconst x5:(MOVBZload [i5] {s} p mem) [40]) o3:(OR <t> s3:(SLDconst x4:(MOVBZload [i4] {s} p mem) [32]) x0:(MOVWZload {s} [i0] p mem)))))
-       // cond: !config.BigEndian && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses ==1 && x7.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && mergePoint(b, x0, x4, x5, x6, x7) != nil && clobber(x0, x4, x5, x6, x7, s3, s4, s5, s6, o3, o4, o5)
-       // result: @mergePoint(b,x0,x4,x5,x6,x7) (MOVDload <t> {s} [i0] p mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s6 := v_0
-                       if s6.Op != OpPPC64SLDconst || auxIntToInt64(s6.AuxInt) != 56 {
-                               continue
-                       }
-                       x7 := s6.Args[0]
-                       if x7.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i7 := auxIntToInt32(x7.AuxInt)
-                       s := auxToSym(x7.Aux)
-                       mem := x7.Args[1]
-                       p := x7.Args[0]
-                       o5 := v_1
-                       if o5.Op != OpPPC64OR || o5.Type != t {
-                               continue
-                       }
-                       _ = o5.Args[1]
-                       o5_0 := o5.Args[0]
-                       o5_1 := o5.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o5_0, o5_1 = _i1+1, o5_1, o5_0 {
-                               s5 := o5_0
-                               if s5.Op != OpPPC64SLDconst || auxIntToInt64(s5.AuxInt) != 48 {
-                                       continue
-                               }
-                               x6 := s5.Args[0]
-                               if x6.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i6 := auxIntToInt32(x6.AuxInt)
-                               if auxToSym(x6.Aux) != s {
-                                       continue
-                               }
-                               _ = x6.Args[1]
-                               if p != x6.Args[0] || mem != x6.Args[1] {
-                                       continue
-                               }
-                               o4 := o5_1
-                               if o4.Op != OpPPC64OR || o4.Type != t {
-                                       continue
-                               }
-                               _ = o4.Args[1]
-                               o4_0 := o4.Args[0]
-                               o4_1 := o4.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, o4_0, o4_1 = _i2+1, o4_1, o4_0 {
-                                       s4 := o4_0
-                                       if s4.Op != OpPPC64SLDconst || auxIntToInt64(s4.AuxInt) != 40 {
-                                               continue
-                                       }
-                                       x5 := s4.Args[0]
-                                       if x5.Op != OpPPC64MOVBZload {
-                                               continue
-                                       }
-                                       i5 := auxIntToInt32(x5.AuxInt)
-                                       if auxToSym(x5.Aux) != s {
-                                               continue
-                                       }
-                                       _ = x5.Args[1]
-                                       if p != x5.Args[0] || mem != x5.Args[1] {
-                                               continue
-                                       }
-                                       o3 := o4_1
-                                       if o3.Op != OpPPC64OR || o3.Type != t {
-                                               continue
-                                       }
-                                       _ = o3.Args[1]
-                                       o3_0 := o3.Args[0]
-                                       o3_1 := o3.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, o3_0, o3_1 = _i3+1, o3_1, o3_0 {
-                                               s3 := o3_0
-                                               if s3.Op != OpPPC64SLDconst || auxIntToInt64(s3.AuxInt) != 32 {
-                                                       continue
-                                               }
-                                               x4 := s3.Args[0]
-                                               if x4.Op != OpPPC64MOVBZload {
-                                                       continue
-                                               }
-                                               i4 := auxIntToInt32(x4.AuxInt)
-                                               if auxToSym(x4.Aux) != s {
-                                                       continue
-                                               }
-                                               _ = x4.Args[1]
-                                               if p != x4.Args[0] || mem != x4.Args[1] {
-                                                       continue
-                                               }
-                                               x0 := o3_1
-                                               if x0.Op != OpPPC64MOVWZload {
-                                                       continue
-                                               }
-                                               i0 := auxIntToInt32(x0.AuxInt)
-                                               if auxToSym(x0.Aux) != s {
-                                                       continue
-                                               }
-                                               _ = x0.Args[1]
-                                               if p != x0.Args[0] || mem != x0.Args[1] || !(!config.BigEndian && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && mergePoint(b, x0, x4, x5, x6, x7) != nil && clobber(x0, x4, x5, x6, x7, s3, s4, s5, s6, o3, o4, o5)) {
-                                                       continue
-                                               }
-                                               b = mergePoint(b, x0, x4, x5, x6, x7)
-                                               v0 := b.NewValue0(x0.Pos, OpPPC64MOVDload, t)
-                                               v.copyOf(v0)
-                                               v0.AuxInt = int32ToAuxInt(i0)
-                                               v0.Aux = symToAux(s)
-                                               v0.AddArg2(p, mem)
-                                               return true
-                                       }
-                               }
-                       }
-               }
-               break
-       }
-       // match: (OR <t> s0:(SLDconst x0:(MOVBZload [i0] {s} p mem) [56]) o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i1] {s} p mem) [48]) o1:(OR <t> s2:(SLDconst x2:(MOVBZload [i2] {s} p mem) [40]) o2:(OR <t> s3:(SLDconst x3:(MOVBZload [i3] {s} p mem) [32]) x4:(MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i4] p) mem)))))
-       // cond: !config.BigEndian && i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0, x1, x2, x3, x4, o0, o1, o2, s0, s1, s2, s3)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpPPC64SLDconst || auxIntToInt64(s0.AuxInt) != 56 {
-                               continue
-                       }
-                       x0 := s0.Args[0]
-                       if x0.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       o0 := v_1
-                       if o0.Op != OpPPC64OR || o0.Type != t {
-                               continue
-                       }
-                       _ = o0.Args[1]
-                       o0_0 := o0.Args[0]
-                       o0_1 := o0.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
-                               s1 := o0_0
-                               if s1.Op != OpPPC64SLDconst || auxIntToInt64(s1.AuxInt) != 48 {
-                                       continue
-                               }
-                               x1 := s1.Args[0]
-                               if x1.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i1 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               o1 := o0_1
-                               if o1.Op != OpPPC64OR || o1.Type != t {
-                                       continue
-                               }
-                               _ = o1.Args[1]
-                               o1_0 := o1.Args[0]
-                               o1_1 := o1.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, o1_0, o1_1 = _i2+1, o1_1, o1_0 {
-                                       s2 := o1_0
-                                       if s2.Op != OpPPC64SLDconst || auxIntToInt64(s2.AuxInt) != 40 {
-                                               continue
-                                       }
-                                       x2 := s2.Args[0]
-                                       if x2.Op != OpPPC64MOVBZload {
-                                               continue
-                                       }
-                                       i2 := auxIntToInt32(x2.AuxInt)
-                                       if auxToSym(x2.Aux) != s {
-                                               continue
-                                       }
-                                       _ = x2.Args[1]
-                                       if p != x2.Args[0] || mem != x2.Args[1] {
-                                               continue
-                                       }
-                                       o2 := o1_1
-                                       if o2.Op != OpPPC64OR || o2.Type != t {
-                                               continue
-                                       }
-                                       _ = o2.Args[1]
-                                       o2_0 := o2.Args[0]
-                                       o2_1 := o2.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, o2_0, o2_1 = _i3+1, o2_1, o2_0 {
-                                               s3 := o2_0
-                                               if s3.Op != OpPPC64SLDconst || auxIntToInt64(s3.AuxInt) != 32 {
-                                                       continue
-                                               }
-                                               x3 := s3.Args[0]
-                                               if x3.Op != OpPPC64MOVBZload {
-                                                       continue
-                                               }
-                                               i3 := auxIntToInt32(x3.AuxInt)
-                                               if auxToSym(x3.Aux) != s {
-                                                       continue
-                                               }
-                                               _ = x3.Args[1]
-                                               if p != x3.Args[0] || mem != x3.Args[1] {
-                                                       continue
-                                               }
-                                               x4 := o2_1
-                                               if x4.Op != OpPPC64MOVWBRload || x4.Type != t {
-                                                       continue
-                                               }
-                                               _ = x4.Args[1]
-                                               x4_0 := x4.Args[0]
-                                               if x4_0.Op != OpPPC64MOVDaddr || x4_0.Type != typ.Uintptr {
-                                                       continue
-                                               }
-                                               i4 := auxIntToInt32(x4_0.AuxInt)
-                                               if p != x4_0.Args[0] || mem != x4.Args[1] || !(!config.BigEndian && i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0, x1, x2, x3, x4, o0, o1, o2, s0, s1, s2, s3)) {
-                                                       continue
-                                               }
-                                               b = mergePoint(b, x0, x1, x2, x3, x4)
-                                               v0 := b.NewValue0(x4.Pos, OpPPC64MOVDBRload, t)
-                                               v.copyOf(v0)
-                                               v1 := b.NewValue0(x4.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                                               v1.AuxInt = int32ToAuxInt(i0)
-                                               v1.Aux = symToAux(s)
-                                               v1.AddArg(p)
-                                               v0.AddArg2(v1, mem)
-                                               return true
-                                       }
-                               }
-                       }
-               }
-               break
-       }
-       // match: (OR <t> x7:(MOVBZload [i7] {s} p mem) o5:(OR <t> s6:(SLDconst x6:(MOVBZload [i6] {s} p mem) [8]) o4:(OR <t> s5:(SLDconst x5:(MOVBZload [i5] {s} p mem) [16]) o3:(OR <t> s4:(SLDconst x4:(MOVBZload [i4] {s} p mem) [24]) s0:(SLWconst x3:(MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])))))
-       // cond: !config.BigEndian && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && mergePoint(b, x3, x4, x5, x6, x7) != nil && clobber(x3, x4, x5, x6, x7, o3, o4, o5, s0, s4, s5, s6)
-       // result: @mergePoint(b,x3,x4,x5,x6,x7) (MOVDBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x7 := v_0
-                       if x7.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i7 := auxIntToInt32(x7.AuxInt)
-                       s := auxToSym(x7.Aux)
-                       mem := x7.Args[1]
-                       p := x7.Args[0]
-                       o5 := v_1
-                       if o5.Op != OpPPC64OR || o5.Type != t {
-                               continue
-                       }
-                       _ = o5.Args[1]
-                       o5_0 := o5.Args[0]
-                       o5_1 := o5.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o5_0, o5_1 = _i1+1, o5_1, o5_0 {
-                               s6 := o5_0
-                               if s6.Op != OpPPC64SLDconst || auxIntToInt64(s6.AuxInt) != 8 {
-                                       continue
-                               }
-                               x6 := s6.Args[0]
-                               if x6.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i6 := auxIntToInt32(x6.AuxInt)
-                               if auxToSym(x6.Aux) != s {
-                                       continue
-                               }
-                               _ = x6.Args[1]
-                               if p != x6.Args[0] || mem != x6.Args[1] {
-                                       continue
-                               }
-                               o4 := o5_1
-                               if o4.Op != OpPPC64OR || o4.Type != t {
-                                       continue
-                               }
-                               _ = o4.Args[1]
-                               o4_0 := o4.Args[0]
-                               o4_1 := o4.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, o4_0, o4_1 = _i2+1, o4_1, o4_0 {
-                                       s5 := o4_0
-                                       if s5.Op != OpPPC64SLDconst || auxIntToInt64(s5.AuxInt) != 16 {
-                                               continue
-                                       }
-                                       x5 := s5.Args[0]
-                                       if x5.Op != OpPPC64MOVBZload {
-                                               continue
-                                       }
-                                       i5 := auxIntToInt32(x5.AuxInt)
-                                       if auxToSym(x5.Aux) != s {
-                                               continue
-                                       }
-                                       _ = x5.Args[1]
-                                       if p != x5.Args[0] || mem != x5.Args[1] {
-                                               continue
-                                       }
-                                       o3 := o4_1
-                                       if o3.Op != OpPPC64OR || o3.Type != t {
-                                               continue
-                                       }
-                                       _ = o3.Args[1]
-                                       o3_0 := o3.Args[0]
-                                       o3_1 := o3.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, o3_0, o3_1 = _i3+1, o3_1, o3_0 {
-                                               s4 := o3_0
-                                               if s4.Op != OpPPC64SLDconst || auxIntToInt64(s4.AuxInt) != 24 {
-                                                       continue
-                                               }
-                                               x4 := s4.Args[0]
-                                               if x4.Op != OpPPC64MOVBZload {
-                                                       continue
-                                               }
-                                               i4 := auxIntToInt32(x4.AuxInt)
-                                               if auxToSym(x4.Aux) != s {
-                                                       continue
-                                               }
-                                               _ = x4.Args[1]
-                                               if p != x4.Args[0] || mem != x4.Args[1] {
-                                                       continue
-                                               }
-                                               s0 := o3_1
-                                               if s0.Op != OpPPC64SLWconst || auxIntToInt64(s0.AuxInt) != 32 {
-                                                       continue
-                                               }
-                                               x3 := s0.Args[0]
-                                               if x3.Op != OpPPC64MOVWBRload || x3.Type != t {
-                                                       continue
-                                               }
-                                               _ = x3.Args[1]
-                                               x3_0 := x3.Args[0]
-                                               if x3_0.Op != OpPPC64MOVDaddr || x3_0.Type != typ.Uintptr {
-                                                       continue
-                                               }
-                                               i0 := auxIntToInt32(x3_0.AuxInt)
-                                               if auxToSym(x3_0.Aux) != s || p != x3_0.Args[0] || mem != x3.Args[1] || !(!config.BigEndian && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && mergePoint(b, x3, x4, x5, x6, x7) != nil && clobber(x3, x4, x5, x6, x7, o3, o4, o5, s0, s4, s5, s6)) {
-                                                       continue
-                                               }
-                                               b = mergePoint(b, x3, x4, x5, x6, x7)
-                                               v0 := b.NewValue0(x3.Pos, OpPPC64MOVDBRload, t)
-                                               v.copyOf(v0)
-                                               v1 := b.NewValue0(x3.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                                               v1.AuxInt = int32ToAuxInt(i0)
-                                               v1.Aux = symToAux(s)
-                                               v1.AddArg(p)
-                                               v0.AddArg2(v1, mem)
-                                               return true
-                                       }
-                               }
-                       }
-               }
-               break
-       }
-       // match: (OR <t> x7:(MOVBZload [i7] {s} p mem) o5:(OR <t> s6:(SLDconst x6:(MOVBZload [i6] {s} p mem) [8]) o4:(OR <t> s5:(SLDconst x5:(MOVBZload [i5] {s} p mem) [16]) o3:(OR <t> s4:(SLDconst x4:(MOVBZload [i4] {s} p mem) [24]) s0:(SLDconst x3:(MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])))))
-       // cond: !config.BigEndian && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && mergePoint(b, x3, x4, x5, x6, x7) != nil && clobber(x3, x4, x5, x6, x7, o3, o4, o5, s0, s4, s5, s6)
-       // result: @mergePoint(b,x3,x4,x5,x6,x7) (MOVDBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
-       for {
-               t := v.Type
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x7 := v_0
-                       if x7.Op != OpPPC64MOVBZload {
-                               continue
-                       }
-                       i7 := auxIntToInt32(x7.AuxInt)
-                       s := auxToSym(x7.Aux)
-                       mem := x7.Args[1]
-                       p := x7.Args[0]
-                       o5 := v_1
-                       if o5.Op != OpPPC64OR || o5.Type != t {
-                               continue
-                       }
-                       _ = o5.Args[1]
-                       o5_0 := o5.Args[0]
-                       o5_1 := o5.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, o5_0, o5_1 = _i1+1, o5_1, o5_0 {
-                               s6 := o5_0
-                               if s6.Op != OpPPC64SLDconst || auxIntToInt64(s6.AuxInt) != 8 {
-                                       continue
-                               }
-                               x6 := s6.Args[0]
-                               if x6.Op != OpPPC64MOVBZload {
-                                       continue
-                               }
-                               i6 := auxIntToInt32(x6.AuxInt)
-                               if auxToSym(x6.Aux) != s {
-                                       continue
-                               }
-                               _ = x6.Args[1]
-                               if p != x6.Args[0] || mem != x6.Args[1] {
-                                       continue
-                               }
-                               o4 := o5_1
-                               if o4.Op != OpPPC64OR || o4.Type != t {
-                                       continue
-                               }
-                               _ = o4.Args[1]
-                               o4_0 := o4.Args[0]
-                               o4_1 := o4.Args[1]
-                               for _i2 := 0; _i2 <= 1; _i2, o4_0, o4_1 = _i2+1, o4_1, o4_0 {
-                                       s5 := o4_0
-                                       if s5.Op != OpPPC64SLDconst || auxIntToInt64(s5.AuxInt) != 16 {
-                                               continue
-                                       }
-                                       x5 := s5.Args[0]
-                                       if x5.Op != OpPPC64MOVBZload {
-                                               continue
-                                       }
-                                       i5 := auxIntToInt32(x5.AuxInt)
-                                       if auxToSym(x5.Aux) != s {
-                                               continue
-                                       }
-                                       _ = x5.Args[1]
-                                       if p != x5.Args[0] || mem != x5.Args[1] {
-                                               continue
-                                       }
-                                       o3 := o4_1
-                                       if o3.Op != OpPPC64OR || o3.Type != t {
-                                               continue
-                                       }
-                                       _ = o3.Args[1]
-                                       o3_0 := o3.Args[0]
-                                       o3_1 := o3.Args[1]
-                                       for _i3 := 0; _i3 <= 1; _i3, o3_0, o3_1 = _i3+1, o3_1, o3_0 {
-                                               s4 := o3_0
-                                               if s4.Op != OpPPC64SLDconst || auxIntToInt64(s4.AuxInt) != 24 {
-                                                       continue
-                                               }
-                                               x4 := s4.Args[0]
-                                               if x4.Op != OpPPC64MOVBZload {
-                                                       continue
-                                               }
-                                               i4 := auxIntToInt32(x4.AuxInt)
-                                               if auxToSym(x4.Aux) != s {
-                                                       continue
-                                               }
-                                               _ = x4.Args[1]
-                                               if p != x4.Args[0] || mem != x4.Args[1] {
-                                                       continue
-                                               }
-                                               s0 := o3_1
-                                               if s0.Op != OpPPC64SLDconst || auxIntToInt64(s0.AuxInt) != 32 {
-                                                       continue
-                                               }
-                                               x3 := s0.Args[0]
-                                               if x3.Op != OpPPC64MOVWBRload || x3.Type != t {
-                                                       continue
-                                               }
-                                               _ = x3.Args[1]
-                                               x3_0 := x3.Args[0]
-                                               if x3_0.Op != OpPPC64MOVDaddr || x3_0.Type != typ.Uintptr {
-                                                       continue
-                                               }
-                                               i0 := auxIntToInt32(x3_0.AuxInt)
-                                               if auxToSym(x3_0.Aux) != s || p != x3_0.Args[0] || mem != x3.Args[1] || !(!config.BigEndian && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && mergePoint(b, x3, x4, x5, x6, x7) != nil && clobber(x3, x4, x5, x6, x7, o3, o4, o5, s0, s4, s5, s6)) {
-                                                       continue
-                                               }
-                                               b = mergePoint(b, x3, x4, x5, x6, x7)
-                                               v0 := b.NewValue0(x3.Pos, OpPPC64MOVDBRload, t)
-                                               v.copyOf(v0)
-                                               v1 := b.NewValue0(x3.Pos, OpPPC64MOVDaddr, typ.Uintptr)
-                                               v1.AuxInt = int32ToAuxInt(i0)
-                                               v1.Aux = symToAux(s)
-                                               v1.AddArg(p)
-                                               v0.AddArg2(v1, mem)
-                                               return true
-                                       }
-                               }
-                       }
-               }
-               break
-       }
        return false
 }
 func rewriteValuePPC64_OpPPC64ORN(v *Value) bool {
index fdd15f31a4895c0834b2d8abd9e9f563f210103c..a3d621898fae61cd371098fa679e797a03d2d668 100644 (file)
@@ -90,6 +90,8 @@ func rewriteValueS390X(v *Value) bool {
                return rewriteValueS390X_OpAvg64u(v)
        case OpBitLen64:
                return rewriteValueS390X_OpBitLen64(v)
+       case OpBswap16:
+               return rewriteValueS390X_OpBswap16(v)
        case OpBswap32:
                v.Op = OpS390XMOVWBR
                return true
@@ -630,6 +632,8 @@ func rewriteValueS390X(v *Value) bool {
                return rewriteValueS390X_OpS390XMOVBstore(v)
        case OpS390XMOVBstoreconst:
                return rewriteValueS390X_OpS390XMOVBstoreconst(v)
+       case OpS390XMOVDBR:
+               return rewriteValueS390X_OpS390XMOVDBR(v)
        case OpS390XMOVDaddridx:
                return rewriteValueS390X_OpS390XMOVDaddridx(v)
        case OpS390XMOVDload:
@@ -638,8 +642,8 @@ func rewriteValueS390X(v *Value) bool {
                return rewriteValueS390X_OpS390XMOVDstore(v)
        case OpS390XMOVDstoreconst:
                return rewriteValueS390X_OpS390XMOVDstoreconst(v)
-       case OpS390XMOVHBRstore:
-               return rewriteValueS390X_OpS390XMOVHBRstore(v)
+       case OpS390XMOVDstoreidx:
+               return rewriteValueS390X_OpS390XMOVDstoreidx(v)
        case OpS390XMOVHZload:
                return rewriteValueS390X_OpS390XMOVHZload(v)
        case OpS390XMOVHZreg:
@@ -652,8 +656,10 @@ func rewriteValueS390X(v *Value) bool {
                return rewriteValueS390X_OpS390XMOVHstore(v)
        case OpS390XMOVHstoreconst:
                return rewriteValueS390X_OpS390XMOVHstoreconst(v)
-       case OpS390XMOVWBRstore:
-               return rewriteValueS390X_OpS390XMOVWBRstore(v)
+       case OpS390XMOVHstoreidx:
+               return rewriteValueS390X_OpS390XMOVHstoreidx(v)
+       case OpS390XMOVWBR:
+               return rewriteValueS390X_OpS390XMOVWBR(v)
        case OpS390XMOVWZload:
                return rewriteValueS390X_OpS390XMOVWZload(v)
        case OpS390XMOVWZreg:
@@ -666,6 +672,8 @@ func rewriteValueS390X(v *Value) bool {
                return rewriteValueS390X_OpS390XMOVWstore(v)
        case OpS390XMOVWstoreconst:
                return rewriteValueS390X_OpS390XMOVWstoreconst(v)
+       case OpS390XMOVWstoreidx:
+               return rewriteValueS390X_OpS390XMOVWstoreidx(v)
        case OpS390XMULLD:
                return rewriteValueS390X_OpS390XMULLD(v)
        case OpS390XMULLDconst:
@@ -1270,6 +1278,55 @@ func rewriteValueS390X_OpBitLen64(v *Value) bool {
                return true
        }
 }
+func rewriteValueS390X_OpBswap16(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Bswap16 x:(MOVHZload [off] {sym} ptr mem))
+       // result: @x.Block (MOVHZreg (MOVHBRload [off] {sym} ptr mem))
+       for {
+               x := v_0
+               if x.Op != OpS390XMOVHZload {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpS390XMOVHZreg, typ.UInt64)
+               v.copyOf(v0)
+               v1 := b.NewValue0(x.Pos, OpS390XMOVHBRload, typ.UInt16)
+               v1.AuxInt = int32ToAuxInt(off)
+               v1.Aux = symToAux(sym)
+               v1.AddArg2(ptr, mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (Bswap16 x:(MOVHZloadidx [off] {sym} ptr idx mem))
+       // result: @x.Block (MOVHZreg (MOVHBRloadidx [off] {sym} ptr idx mem))
+       for {
+               x := v_0
+               if x.Op != OpS390XMOVHZloadidx {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVHZreg, typ.UInt64)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVHBRloadidx, typ.Int16)
+               v1.AuxInt = int32ToAuxInt(off)
+               v1.Aux = symToAux(sym)
+               v1.AddArg3(ptr, idx, mem)
+               v0.AddArg(v1)
+               return true
+       }
+       return false
+}
 func rewriteValueS390X_OpCeil(v *Value) bool {
        v_0 := v.Args[0]
        // match: (Ceil x)
@@ -8651,280 +8708,6 @@ func rewriteValueS390X_OpS390XMOVBstore(v *Value) bool {
                v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRDconst [8] w) mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpS390XMOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRDconst || auxIntToUint8(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p w0:(SRDconst [j] w) x:(MOVBstore [i-1] {s} p (SRDconst [j+8] w) mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} p w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w0 := v_1
-               if w0.Op != OpS390XSRDconst {
-                       break
-               }
-               j := auxIntToUint8(w0.AuxInt)
-               w := w0.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRDconst || auxIntToUint8(x_1.AuxInt) != j+8 || w != x_1.Args[0] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRWconst [8] w) mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpS390XMOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRWconst || auxIntToUint8(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p w0:(SRWconst [j] w) x:(MOVBstore [i-1] {s} p (SRWconst [j+8] w) mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} p w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w0 := v_1
-               if w0.Op != OpS390XSRWconst {
-                       break
-               }
-               j := auxIntToUint8(w0.AuxInt)
-               w := w0.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRWconst || auxIntToUint8(x_1.AuxInt) != j+8 || w != x_1.Args[0] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVHstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p (SRDconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVHBRstore [i-1] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpS390XSRDconst || auxIntToUint8(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVHBRstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p (SRDconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRDconst [j-8] w) mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVHBRstore [i-1] {s} p w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpS390XSRDconst {
-                       break
-               }
-               j := auxIntToUint8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != OpS390XSRDconst || auxIntToUint8(w0.AuxInt) != j-8 || w != w0.Args[0] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVHBRstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p (SRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVHBRstore [i-1] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpS390XSRWconst || auxIntToUint8(v_1.AuxInt) != 8 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVHBRstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} p (SRWconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRWconst [j-8] w) mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVHBRstore [i-1] {s} p w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpS390XSRWconst {
-                       break
-               }
-               j := auxIntToUint8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != OpS390XSRWconst || auxIntToUint8(w0.AuxInt) != j-8 || w != w0.Args[0] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVHBRstore)
-               v.AuxInt = int32ToAuxInt(i - 1)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
-               return true
-       }
-       // match: (MOVBstore [7] {s} p1 (SRDconst w) x1:(MOVHBRstore [5] {s} p1 (SRDconst w) x2:(MOVWBRstore [1] {s} p1 (SRDconst w) x3:(MOVBstore [0] {s} p1 w mem))))
-       // cond: x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && clobber(x1, x2, x3)
-       // result: (MOVDBRstore {s} p1 w mem)
-       for {
-               if auxIntToInt32(v.AuxInt) != 7 {
-                       break
-               }
-               s := auxToSym(v.Aux)
-               p1 := v_0
-               if v_1.Op != OpS390XSRDconst {
-                       break
-               }
-               w := v_1.Args[0]
-               x1 := v_2
-               if x1.Op != OpS390XMOVHBRstore || auxIntToInt32(x1.AuxInt) != 5 || auxToSym(x1.Aux) != s {
-                       break
-               }
-               _ = x1.Args[2]
-               if p1 != x1.Args[0] {
-                       break
-               }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpS390XSRDconst || w != x1_1.Args[0] {
-                       break
-               }
-               x2 := x1.Args[2]
-               if x2.Op != OpS390XMOVWBRstore || auxIntToInt32(x2.AuxInt) != 1 || auxToSym(x2.Aux) != s {
-                       break
-               }
-               _ = x2.Args[2]
-               if p1 != x2.Args[0] {
-                       break
-               }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpS390XSRDconst || w != x2_1.Args[0] {
-                       break
-               }
-               x3 := x2.Args[2]
-               if x3.Op != OpS390XMOVBstore || auxIntToInt32(x3.AuxInt) != 0 || auxToSym(x3.Aux) != s {
-                       break
-               }
-               mem := x3.Args[2]
-               if p1 != x3.Args[0] || w != x3.Args[1] || !(x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && clobber(x1, x2, x3)) {
-                       break
-               }
-               v.reset(OpS390XMOVDBRstore)
-               v.Aux = symToAux(s)
-               v.AddArg3(p1, w, mem)
-               return true
-       }
        return false
 }
 func rewriteValueS390X_OpS390XMOVBstoreconst(v *Value) bool {
@@ -8973,29 +8756,57 @@ func rewriteValueS390X_OpS390XMOVBstoreconst(v *Value) bool {
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && a.Off() + 1 == c.Off() && clobber(x)
-       // result: (MOVHstoreconst [makeValAndOff(c.Val()&0xff | a.Val()<<8, a.Off())] {s} p mem)
+       return false
+}
+func rewriteValueS390X_OpS390XMOVDBR(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (MOVDBR x:(MOVDload [off] {sym} ptr mem))
+       // cond: x.Uses == 1
+       // result: @x.Block (MOVDBRload [off] {sym} ptr mem)
        for {
-               c := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               x := v_1
-               if x.Op != OpS390XMOVBstoreconst {
+               x := v_0
+               if x.Op != OpS390XMOVDload {
                        break
                }
-               a := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1) {
                        break
                }
-               mem := x.Args[1]
-               if p != x.Args[0] || !(p.Op != OpSB && x.Uses == 1 && a.Off()+1 == c.Off() && clobber(x)) {
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpS390XMOVDBRload, typ.UInt64)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (MOVDBR x:(MOVDloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1
+       // result: @x.Block (MOVDBRloadidx [off] {sym} ptr idx mem)
+       for {
+               x := v_0
+               if x.Op != OpS390XMOVDloadidx {
                        break
                }
-               v.reset(OpS390XMOVHstoreconst)
-               v.AuxInt = valAndOffToAuxInt(makeValAndOff(c.Val()&0xff|a.Val()<<8, a.Off()))
-               v.Aux = symToAux(s)
-               v.AddArg2(p, mem)
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               if !(x.Uses == 1) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVDBRloadidx, typ.Int64)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg3(ptr, idx, mem)
                return true
        }
        return false
@@ -9329,6 +9140,28 @@ func rewriteValueS390X_OpS390XMOVDstore(v *Value) bool {
                v.AddArg6(p, w0, w1, w2, w3, mem)
                return true
        }
+       // match: (MOVDstore [off] {sym} ptr r:(MOVDBR x) mem)
+       // cond: r.Uses == 1
+       // result: (MOVDBRstore [off] {sym} ptr x mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               r := v_1
+               if r.Op != OpS390XMOVDBR {
+                       break
+               }
+               x := r.Args[0]
+               mem := v_2
+               if !(r.Uses == 1) {
+                       break
+               }
+               v.reset(OpS390XMOVDBRstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XMOVDstoreconst(v *Value) bool {
@@ -9379,118 +9212,32 @@ func rewriteValueS390X_OpS390XMOVDstoreconst(v *Value) bool {
        }
        return false
 }
-func rewriteValueS390X_OpS390XMOVHBRstore(v *Value) bool {
+func rewriteValueS390X_OpS390XMOVDstoreidx(v *Value) bool {
+       v_3 := v.Args[3]
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w mem)
+       // match: (MOVDstoreidx [off] {sym} ptr idx r:(MOVDBR x) mem)
+       // cond: r.Uses == 1
+       // result: (MOVDBRstoreidx [off] {sym} ptr idx x mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpS390XSRDconst || auxIntToUint8(v_1.AuxInt) != 16 {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               idx := v_1
+               r := v_2
+               if r.Op != OpS390XMOVDBR {
                        break
                }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVHBRstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpS390XSRDconst {
-                       break
-               }
-               j := auxIntToUint8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVHBRstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != OpS390XSRDconst || auxIntToUint8(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
-               return true
-       }
-       // match: (MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpS390XSRWconst || auxIntToUint8(v_1.AuxInt) != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVHBRstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWBRstore [i-2] {s} p w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpS390XSRWconst {
-                       break
-               }
-               j := auxIntToUint8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVHBRstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != OpS390XSRWconst || auxIntToUint8(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               x := r.Args[0]
+               mem := v_3
+               if !(r.Uses == 1) {
                        break
                }
-               v.reset(OpS390XMOVWBRstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
+               v.reset(OpS390XMOVDBRstoreidx)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg4(ptr, idx, x, mem)
                return true
        }
        return false
@@ -10114,118 +9861,21 @@ func rewriteValueS390X_OpS390XMOVHstore(v *Value) bool {
                v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpS390XMOVHstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRDconst || auxIntToUint8(x_1.AuxInt) != 16 || w != x_1.Args[0] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w0 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w0 := v_1
-               if w0.Op != OpS390XSRDconst {
-                       break
-               }
-               j := auxIntToUint8(w0.AuxInt)
-               w := w0.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVHstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRDconst || auxIntToUint8(x_1.AuxInt) != j+16 || w != x_1.Args[0] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
-               return true
-       }
-       // match: (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w := v_1
-               x := v_2
-               if x.Op != OpS390XMOVHstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRWconst || auxIntToUint8(x_1.AuxInt) != 16 || w != x_1.Args[0] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
-               return true
-       }
-       // match: (MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-2] {s} p w0 mem)
+       // match: (MOVHstore [off] {sym} ptr (Bswap16 val) mem)
+       // result: (MOVHBRstore [off] {sym} ptr val mem)
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w0 := v_1
-               if w0.Op != OpS390XSRWconst {
-                       break
-               }
-               j := auxIntToUint8(w0.AuxInt)
-               w := w0.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVHstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRWconst || auxIntToUint8(x_1.AuxInt) != j+16 || w != x_1.Args[0] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpBswap16 {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = int32ToAuxInt(i - 2)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpS390XMOVHBRstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
                return true
        }
        return false
@@ -10233,8 +9883,6 @@ func rewriteValueS390X_OpS390XMOVHstore(v *Value) bool {
 func rewriteValueS390X_OpS390XMOVHstoreconst(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
        // match: (MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem)
        // cond: isU12Bit(sc.Off64()+int64(off))
        // result: (MOVHstoreconst [sc.addOffset32(off)] {s} ptr mem)
@@ -10278,92 +9926,86 @@ func rewriteValueS390X_OpS390XMOVHstoreconst(v *Value) bool {
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && a.Off() + 2 == c.Off() && clobber(x)
-       // result: (MOVWstore [a.Off()] {s} p (MOVDconst [int64(c.Val()&0xffff | a.Val()<<16)]) mem)
+       return false
+}
+func rewriteValueS390X_OpS390XMOVHstoreidx(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVHstoreidx [off] {sym} ptr idx (Bswap16 val) mem)
+       // result: (MOVHBRstoreidx [off] {sym} ptr idx val mem)
        for {
-               c := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               x := v_1
-               if x.Op != OpS390XMOVHstoreconst {
-                       break
-               }
-               a := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[1]
-               if p != x.Args[0] || !(p.Op != OpSB && x.Uses == 1 && a.Off()+2 == c.Off() && clobber(x)) {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               idx := v_1
+               if v_2.Op != OpBswap16 {
                        break
                }
-               v.reset(OpS390XMOVWstore)
-               v.AuxInt = int32ToAuxInt(a.Off())
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x.Pos, OpS390XMOVDconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(int64(c.Val()&0xffff | a.Val()<<16))
-               v.AddArg3(p, v0, mem)
+               val := v_2.Args[0]
+               mem := v_3
+               v.reset(OpS390XMOVHBRstoreidx)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg4(ptr, idx, val, mem)
                return true
        }
        return false
 }
-func rewriteValueS390X_OpS390XMOVWBRstore(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueS390X_OpS390XMOVWBR(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVDBRstore [i-4] {s} p w mem)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (MOVWBR x:(MOVWZload [off] {sym} ptr mem))
+       // cond: x.Uses == 1
+       // result: @x.Block (MOVWZreg (MOVWBRload [off] {sym} ptr mem))
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpS390XSRDconst || auxIntToUint8(v_1.AuxInt) != 32 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVWBRstore || auxIntToInt32(x.AuxInt) != i-4 || auxToSym(x.Aux) != s {
+               x := v_0
+               if x.Op != OpS390XMOVWZload {
                        break
                }
-               mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1) {
                        break
                }
-               v.reset(OpS390XMOVDBRstore)
-               v.AuxInt = int32ToAuxInt(i - 4)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpS390XMOVWZreg, typ.UInt64)
+               v.copyOf(v0)
+               v1 := b.NewValue0(x.Pos, OpS390XMOVWBRload, typ.UInt32)
+               v1.AuxInt = int32ToAuxInt(off)
+               v1.Aux = symToAux(sym)
+               v1.AddArg2(ptr, mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVDBRstore [i-4] {s} p w0 mem)
+       // match: (MOVWBR x:(MOVWZloadidx [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1
+       // result: @x.Block (MOVWZreg (MOVWBRloadidx [off] {sym} ptr idx mem))
        for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               if v_1.Op != OpS390XSRDconst {
-                       break
-               }
-               j := auxIntToUint8(v_1.AuxInt)
-               w := v_1.Args[0]
-               x := v_2
-               if x.Op != OpS390XMOVWBRstore || auxIntToInt32(x.AuxInt) != i-4 || auxToSym(x.Aux) != s {
+               x := v_0
+               if x.Op != OpS390XMOVWZloadidx {
                        break
                }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
                mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != OpS390XSRDconst || auxIntToUint8(w0.AuxInt) != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               if !(x.Uses == 1) {
                        break
                }
-               v.reset(OpS390XMOVDBRstore)
-               v.AuxInt = int32ToAuxInt(i - 4)
-               v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpS390XMOVWZreg, typ.UInt64)
+               v.copyOf(v0)
+               v1 := b.NewValue0(v.Pos, OpS390XMOVWBRloadidx, typ.Int32)
+               v1.AuxInt = int32ToAuxInt(off)
+               v1.Aux = symToAux(sym)
+               v1.AddArg3(ptr, idx, mem)
+               v0.AddArg(v1)
                return true
        }
        return false
@@ -10952,118 +10594,62 @@ func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool {
                v.AddArg3(base, val, mem)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVDstore [i-4] {s} p w mem)
+       // match: (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
+       // cond: p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && clobber(x)
+       // result: (STM2 [i-4] {s} p w0 w1 mem)
        for {
                i := auxIntToInt32(v.AuxInt)
                s := auxToSym(v.Aux)
                p := v_0
-               if v_1.Op != OpS390XSRDconst || auxIntToUint8(v_1.AuxInt) != 32 {
-                       break
-               }
-               w := v_1.Args[0]
+               w1 := v_1
                x := v_2
                if x.Op != OpS390XMOVWstore || auxIntToInt32(x.AuxInt) != i-4 || auxToSym(x.Aux) != s {
                        break
                }
                mem := x.Args[2]
-               if p != x.Args[0] || w != x.Args[1] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               if p != x.Args[0] {
                        break
                }
-               v.reset(OpS390XMOVDstore)
+               w0 := x.Args[1]
+               if !(p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && clobber(x)) {
+                       break
+               }
+               v.reset(OpS390XSTM2)
                v.AuxInt = int32ToAuxInt(i - 4)
                v.Aux = symToAux(s)
-               v.AddArg3(p, w, mem)
+               v.AddArg4(p, w0, w1, mem)
                return true
        }
-       // match: (MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && clobber(x)
-       // result: (MOVDstore [i-4] {s} p w0 mem)
+       // match: (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
+       // cond: x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)
+       // result: (STM3 [i-8] {s} p w0 w1 w2 mem)
        for {
                i := auxIntToInt32(v.AuxInt)
                s := auxToSym(v.Aux)
                p := v_0
-               w0 := v_1
-               if w0.Op != OpS390XSRDconst {
-                       break
-               }
-               j := auxIntToUint8(w0.AuxInt)
-               w := w0.Args[0]
+               w2 := v_1
                x := v_2
-               if x.Op != OpS390XMOVWstore || auxIntToInt32(x.AuxInt) != i-4 || auxToSym(x.Aux) != s {
+               if x.Op != OpS390XSTM2 || auxIntToInt32(x.AuxInt) != i-8 || auxToSym(x.Aux) != s {
                        break
                }
-               mem := x.Args[2]
+               mem := x.Args[3]
                if p != x.Args[0] {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpS390XSRDconst || auxIntToUint8(x_1.AuxInt) != j+32 || w != x_1.Args[0] || !(p.Op != OpSB && x.Uses == 1 && clobber(x)) {
+               w0 := x.Args[1]
+               w1 := x.Args[2]
+               if !(x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)) {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = int32ToAuxInt(i - 4)
+               v.reset(OpS390XSTM3)
+               v.AuxInt = int32ToAuxInt(i - 8)
                v.Aux = symToAux(s)
-               v.AddArg3(p, w0, mem)
+               v.AddArg5(p, w0, w1, w2, mem)
                return true
        }
-       // match: (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && clobber(x)
-       // result: (STM2 [i-4] {s} p w0 w1 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w1 := v_1
-               x := v_2
-               if x.Op != OpS390XMOVWstore || auxIntToInt32(x.AuxInt) != i-4 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if !(p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XSTM2)
-               v.AuxInt = int32ToAuxInt(i - 4)
-               v.Aux = symToAux(s)
-               v.AddArg4(p, w0, w1, mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
-       // cond: x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)
-       // result: (STM3 [i-8] {s} p w0 w1 w2 mem)
-       for {
-               i := auxIntToInt32(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               w2 := v_1
-               x := v_2
-               if x.Op != OpS390XSTM2 || auxIntToInt32(x.AuxInt) != i-8 || auxToSym(x.Aux) != s {
-                       break
-               }
-               mem := x.Args[3]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               w1 := x.Args[2]
-               if !(x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)) {
-                       break
-               }
-               v.reset(OpS390XSTM3)
-               v.AuxInt = int32ToAuxInt(i - 8)
-               v.Aux = symToAux(s)
-               v.AddArg5(p, w0, w1, w2, mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
-       // cond: x.Uses == 1 && is20Bit(int64(i)-12) && clobber(x)
-       // result: (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
+       // match: (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
+       // cond: x.Uses == 1 && is20Bit(int64(i)-12) && clobber(x)
+       // result: (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
        for {
                i := auxIntToInt32(v.AuxInt)
                s := auxToSym(v.Aux)
@@ -11089,13 +10675,33 @@ func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool {
                v.AddArg6(p, w0, w1, w2, w3, mem)
                return true
        }
+       // match: (MOVWstore [off] {sym} ptr r:(MOVWBR x) mem)
+       // cond: r.Uses == 1
+       // result: (MOVWBRstore [off] {sym} ptr x mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               r := v_1
+               if r.Op != OpS390XMOVWBR {
+                       break
+               }
+               x := r.Args[0]
+               mem := v_2
+               if !(r.Uses == 1) {
+                       break
+               }
+               v.reset(OpS390XMOVWBRstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XMOVWstoreconst(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
        // match: (MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem)
        // cond: isU12Bit(sc.Off64()+int64(off))
        // result: (MOVWstoreconst [sc.addOffset32(off)] {s} ptr mem)
@@ -11139,31 +10745,34 @@ func rewriteValueS390X_OpS390XMOVWstoreconst(v *Value) bool {
                v.AddArg2(ptr, mem)
                return true
        }
-       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-       // cond: p.Op != OpSB && x.Uses == 1 && a.Off() + 4 == c.Off() && clobber(x)
-       // result: (MOVDstore [a.Off()] {s} p (MOVDconst [c.Val64()&0xffffffff | a.Val64()<<32]) mem)
+       return false
+}
+func rewriteValueS390X_OpS390XMOVWstoreidx(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVWstoreidx [off] {sym} ptr idx r:(MOVWBR x) mem)
+       // cond: r.Uses == 1
+       // result: (MOVWBRstoreidx [off] {sym} ptr idx x mem)
        for {
-               c := auxIntToValAndOff(v.AuxInt)
-               s := auxToSym(v.Aux)
-               p := v_0
-               x := v_1
-               if x.Op != OpS390XMOVWstoreconst {
-                       break
-               }
-               a := auxIntToValAndOff(x.AuxInt)
-               if auxToSym(x.Aux) != s {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               idx := v_1
+               r := v_2
+               if r.Op != OpS390XMOVWBR {
                        break
                }
-               mem := x.Args[1]
-               if p != x.Args[0] || !(p.Op != OpSB && x.Uses == 1 && a.Off()+4 == c.Off() && clobber(x)) {
+               x := r.Args[0]
+               mem := v_3
+               if !(r.Uses == 1) {
                        break
                }
-               v.reset(OpS390XMOVDstore)
-               v.AuxInt = int32ToAuxInt(a.Off())
-               v.Aux = symToAux(s)
-               v0 := b.NewValue0(x.Pos, OpS390XMOVDconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(c.Val64()&0xffffffff | a.Val64()<<32)
-               v.AddArg3(p, v0, mem)
+               v.reset(OpS390XMOVWBRstoreidx)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg4(ptr, idx, x, mem)
                return true
        }
        return false
@@ -11657,7 +11266,6 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       typ := &b.Func.Config.Types
        // match: (OR x (MOVDconst [c]))
        // cond: isU32Bit(c)
        // result: (ORconst [c] x)
@@ -11816,531 +11424,11 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool {
                }
                break
        }
-       // match: (OR x1:(MOVBZload [i1] {s} p mem) sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x1 := v_0
-                       if x1.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpS390XSLDconst || auxIntToUint8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := sh.Args[0]
-                       if x0.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       if auxToSym(x0.Aux) != s {
-                               continue
-                       }
-                       _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpS390XMOVHZload, typ.UInt16)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR x1:(MOVHZload [i1] {s} p mem) sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
-       // cond: i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x1 := v_0
-                       if x1.Op != OpS390XMOVHZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpS390XSLDconst || auxIntToUint8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       x0 := sh.Args[0]
-                       if x0.Op != OpS390XMOVHZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       if auxToSym(x0.Aux) != s {
-                               continue
-                       }
-                       _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpS390XMOVWZload, typ.UInt32)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR x1:(MOVWZload [i1] {s} p mem) sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
-       // cond: i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x1 := v_0
-                       if x1.Op != OpS390XMOVWZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpS390XSLDconst || auxIntToUint8(sh.AuxInt) != 32 {
-                               continue
-                       }
-                       x0 := sh.Args[0]
-                       if x0.Op != OpS390XMOVWZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       if auxToSym(x0.Aux) != s {
-                               continue
-                       }
-                       _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+4 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpS390XMOVDload, typ.UInt64)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)) y))
-       // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpS390XSLDconst {
-                               continue
-                       }
-                       j0 := auxIntToUint8(s0.AuxInt)
-                       x0 := s0.Args[0]
-                       if x0.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       or := v_1
-                       if or.Op != OpS390XOR {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s1 := or_0
-                               if s1.Op != OpS390XSLDconst {
-                                       continue
-                               }
-                               j1 := auxIntToUint8(s1.AuxInt)
-                               x1 := s1.Args[0]
-                               if x1.Op != OpS390XMOVBZload {
-                                       continue
-                               }
-                               i1 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x1.Pos, OpS390XOR, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x1.Pos, OpS390XSLDconst, v.Type)
-                               v1.AuxInt = uint8ToAuxInt(j1)
-                               v2 := b.NewValue0(x1.Pos, OpS390XMOVHZload, typ.UInt16)
-                               v2.AuxInt = int32ToAuxInt(i0)
-                               v2.Aux = symToAux(s)
-                               v2.AddArg2(p, mem)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem)) or:(OR s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem)) y))
-       // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpS390XSLDconst {
-                               continue
-                       }
-                       j0 := auxIntToUint8(s0.AuxInt)
-                       x0 := s0.Args[0]
-                       if x0.Op != OpS390XMOVHZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       or := v_1
-                       if or.Op != OpS390XOR {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s1 := or_0
-                               if s1.Op != OpS390XSLDconst {
-                                       continue
-                               }
-                               j1 := auxIntToUint8(s1.AuxInt)
-                               x1 := s1.Args[0]
-                               if x1.Op != OpS390XMOVHZload {
-                                       continue
-                               }
-                               i1 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x1.Pos, OpS390XOR, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x1.Pos, OpS390XSLDconst, v.Type)
-                               v1.AuxInt = uint8ToAuxInt(j1)
-                               v2 := b.NewValue0(x1.Pos, OpS390XMOVWZload, typ.UInt32)
-                               v2.AuxInt = int32ToAuxInt(i0)
-                               v2.Aux = symToAux(s)
-                               v2.AddArg2(p, mem)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR x0:(MOVBZload [i0] {s} p mem) sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
-       // cond: p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpS390XSLDconst || auxIntToUint8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpS390XMOVHZreg, typ.UInt64)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x1.Pos, OpS390XMOVHBRload, typ.UInt16)
-                       v1.AuxInt = int32ToAuxInt(i0)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (OR r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)) sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r0 := v_0
-                       if r0.Op != OpS390XMOVHZreg {
-                               continue
-                       }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpS390XMOVHBRload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpS390XSLDconst || auxIntToUint8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       r1 := sh.Args[0]
-                       if r1.Op != OpS390XMOVHZreg {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpS390XMOVHBRload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpS390XMOVWZreg, typ.UInt64)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x1.Pos, OpS390XMOVWBRload, typ.UInt32)
-                       v1.AuxInt = int32ToAuxInt(i0)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (OR r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem)) sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
-       // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r0 := v_0
-                       if r0.Op != OpS390XMOVWZreg {
-                               continue
-                       }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpS390XMOVWBRload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpS390XSLDconst || auxIntToUint8(sh.AuxInt) != 32 {
-                               continue
-                       }
-                       r1 := sh.Args[0]
-                       if r1.Op != OpS390XMOVWZreg {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpS390XMOVWBRload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpS390XMOVDBRload, typ.UInt64)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (OR s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem)) or:(OR s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem)) y))
-       // cond: p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpS390XSLDconst {
-                               continue
-                       }
-                       j1 := auxIntToUint8(s1.AuxInt)
-                       x1 := s1.Args[0]
-                       if x1.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       or := v_1
-                       if or.Op != OpS390XOR {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s0 := or_0
-                               if s0.Op != OpS390XSLDconst {
-                                       continue
-                               }
-                               j0 := auxIntToUint8(s0.AuxInt)
-                               x0 := s0.Args[0]
-                               if x0.Op != OpS390XMOVBZload {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x0.AuxInt)
-                               if auxToSym(x0.Aux) != s {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               if p != x0.Args[0] || mem != x0.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x0.Pos, OpS390XOR, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x0.Pos, OpS390XSLDconst, v.Type)
-                               v1.AuxInt = uint8ToAuxInt(j0)
-                               v2 := b.NewValue0(x0.Pos, OpS390XMOVHZreg, typ.UInt64)
-                               v3 := b.NewValue0(x0.Pos, OpS390XMOVHBRload, typ.UInt16)
-                               v3.AuxInt = int32ToAuxInt(i0)
-                               v3.Aux = symToAux(s)
-                               v3.AddArg2(p, mem)
-                               v2.AddArg(v3)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (OR s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))) or:(OR s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))) y))
-       // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpS390XSLDconst {
-                               continue
-                       }
-                       j1 := auxIntToUint8(s1.AuxInt)
-                       r1 := s1.Args[0]
-                       if r1.Op != OpS390XMOVHZreg {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpS390XMOVHBRload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       or := v_1
-                       if or.Op != OpS390XOR {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s0 := or_0
-                               if s0.Op != OpS390XSLDconst {
-                                       continue
-                               }
-                               j0 := auxIntToUint8(s0.AuxInt)
-                               r0 := s0.Args[0]
-                               if r0.Op != OpS390XMOVHZreg {
-                                       continue
-                               }
-                               x0 := r0.Args[0]
-                               if x0.Op != OpS390XMOVHBRload {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x0.AuxInt)
-                               if auxToSym(x0.Aux) != s {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               if p != x0.Args[0] || mem != x0.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x0.Pos, OpS390XOR, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x0.Pos, OpS390XSLDconst, v.Type)
-                               v1.AuxInt = uint8ToAuxInt(j0)
-                               v2 := b.NewValue0(x0.Pos, OpS390XMOVWZreg, typ.UInt64)
-                               v3 := b.NewValue0(x0.Pos, OpS390XMOVWBRload, typ.UInt32)
-                               v3.AuxInt = int32ToAuxInt(i0)
-                               v3.Aux = symToAux(s)
-                               v3.AddArg2(p, mem)
-                               v2.AddArg(v3)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
        return false
 }
 func rewriteValueS390X_OpS390XORW(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
        // match: (ORW x (MOVDconst [c]))
        // result: (ORWconst [int32(c)] x)
        for {
@@ -12421,300 +11509,6 @@ func rewriteValueS390X_OpS390XORW(v *Value) bool {
                }
                break
        }
-       // match: (ORW x1:(MOVBZload [i1] {s} p mem) sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x1 := v_0
-                       if x1.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpS390XSLWconst || auxIntToUint8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x0 := sh.Args[0]
-                       if x0.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       if auxToSym(x0.Aux) != s {
-                               continue
-                       }
-                       _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpS390XMOVHZload, typ.UInt16)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORW x1:(MOVHZload [i1] {s} p mem) sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
-       // cond: i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x1 := v_0
-                       if x1.Op != OpS390XMOVHZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       sh := v_1
-                       if sh.Op != OpS390XSLWconst || auxIntToUint8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       x0 := sh.Args[0]
-                       if x0.Op != OpS390XMOVHZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       if auxToSym(x0.Aux) != s {
-                               continue
-                       }
-                       _ = x0.Args[1]
-                       if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x0.Pos, OpS390XMOVWZload, typ.UInt32)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORW s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)) or:(ORW s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)) y))
-       // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s0 := v_0
-                       if s0.Op != OpS390XSLWconst {
-                               continue
-                       }
-                       j0 := auxIntToUint8(s0.AuxInt)
-                       x0 := s0.Args[0]
-                       if x0.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       or := v_1
-                       if or.Op != OpS390XORW {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s1 := or_0
-                               if s1.Op != OpS390XSLWconst {
-                                       continue
-                               }
-                               j1 := auxIntToUint8(s1.AuxInt)
-                               x1 := s1.Args[0]
-                               if x1.Op != OpS390XMOVBZload {
-                                       continue
-                               }
-                               i1 := auxIntToInt32(x1.AuxInt)
-                               if auxToSym(x1.Aux) != s {
-                                       continue
-                               }
-                               _ = x1.Args[1]
-                               if p != x1.Args[0] || mem != x1.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x1.Pos, OpS390XORW, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x1.Pos, OpS390XSLWconst, v.Type)
-                               v1.AuxInt = uint8ToAuxInt(j1)
-                               v2 := b.NewValue0(x1.Pos, OpS390XMOVHZload, typ.UInt16)
-                               v2.AuxInt = int32ToAuxInt(i0)
-                               v2.Aux = symToAux(s)
-                               v2.AddArg2(p, mem)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
-       // match: (ORW x0:(MOVBZload [i0] {s} p mem) sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)))
-       // cond: p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x0 := v_0
-                       if x0.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpS390XSLWconst || auxIntToUint8(sh.AuxInt) != 8 {
-                               continue
-                       }
-                       x1 := sh.Args[0]
-                       if x1.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(p.Op != OpSB && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpS390XMOVHZreg, typ.UInt64)
-                       v.copyOf(v0)
-                       v1 := b.NewValue0(x1.Pos, OpS390XMOVHBRload, typ.UInt16)
-                       v1.AuxInt = int32ToAuxInt(i0)
-                       v1.Aux = symToAux(s)
-                       v1.AddArg2(p, mem)
-                       v0.AddArg(v1)
-                       return true
-               }
-               break
-       }
-       // match: (ORW r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)) sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
-       // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
-       // result: @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       r0 := v_0
-                       if r0.Op != OpS390XMOVHZreg {
-                               continue
-                       }
-                       x0 := r0.Args[0]
-                       if x0.Op != OpS390XMOVHBRload {
-                               continue
-                       }
-                       i0 := auxIntToInt32(x0.AuxInt)
-                       s := auxToSym(x0.Aux)
-                       mem := x0.Args[1]
-                       p := x0.Args[0]
-                       sh := v_1
-                       if sh.Op != OpS390XSLWconst || auxIntToUint8(sh.AuxInt) != 16 {
-                               continue
-                       }
-                       r1 := sh.Args[0]
-                       if r1.Op != OpS390XMOVHZreg {
-                               continue
-                       }
-                       x1 := r1.Args[0]
-                       if x1.Op != OpS390XMOVHBRload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       if auxToSym(x1.Aux) != s {
-                               continue
-                       }
-                       _ = x1.Args[1]
-                       if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
-                               continue
-                       }
-                       b = mergePoint(b, x0, x1)
-                       v0 := b.NewValue0(x1.Pos, OpS390XMOVWBRload, typ.UInt32)
-                       v.copyOf(v0)
-                       v0.AuxInt = int32ToAuxInt(i0)
-                       v0.Aux = symToAux(s)
-                       v0.AddArg2(p, mem)
-                       return true
-               }
-               break
-       }
-       // match: (ORW s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem)) or:(ORW s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem)) y))
-       // cond: p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
-       // result: @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       s1 := v_0
-                       if s1.Op != OpS390XSLWconst {
-                               continue
-                       }
-                       j1 := auxIntToUint8(s1.AuxInt)
-                       x1 := s1.Args[0]
-                       if x1.Op != OpS390XMOVBZload {
-                               continue
-                       }
-                       i1 := auxIntToInt32(x1.AuxInt)
-                       s := auxToSym(x1.Aux)
-                       mem := x1.Args[1]
-                       p := x1.Args[0]
-                       or := v_1
-                       if or.Op != OpS390XORW {
-                               continue
-                       }
-                       _ = or.Args[1]
-                       or_0 := or.Args[0]
-                       or_1 := or.Args[1]
-                       for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
-                               s0 := or_0
-                               if s0.Op != OpS390XSLWconst {
-                                       continue
-                               }
-                               j0 := auxIntToUint8(s0.AuxInt)
-                               x0 := s0.Args[0]
-                               if x0.Op != OpS390XMOVBZload {
-                                       continue
-                               }
-                               i0 := auxIntToInt32(x0.AuxInt)
-                               if auxToSym(x0.Aux) != s {
-                                       continue
-                               }
-                               _ = x0.Args[1]
-                               if p != x0.Args[0] || mem != x0.Args[1] {
-                                       continue
-                               }
-                               y := or_1
-                               if !(p.Op != OpSB && i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
-                                       continue
-                               }
-                               b = mergePoint(b, x0, x1, y)
-                               v0 := b.NewValue0(x0.Pos, OpS390XORW, v.Type)
-                               v.copyOf(v0)
-                               v1 := b.NewValue0(x0.Pos, OpS390XSLWconst, v.Type)
-                               v1.AuxInt = uint8ToAuxInt(j0)
-                               v2 := b.NewValue0(x0.Pos, OpS390XMOVHZreg, typ.UInt64)
-                               v3 := b.NewValue0(x0.Pos, OpS390XMOVHBRload, typ.UInt16)
-                               v3.AuxInt = int32ToAuxInt(i0)
-                               v3.Aux = symToAux(s)
-                               v3.AddArg2(p, mem)
-                               v2.AddArg(v3)
-                               v1.AddArg(v2)
-                               v0.AddArg2(v1, y)
-                               return true
-                       }
-               }
-               break
-       }
        return false
 }
 func rewriteValueS390X_OpS390XORWconst(v *Value) bool {
index 0567b3e21436961905334eb912c273418facb13e..6b0367a3dc61be591a1f956bdf449088279f021b 100644 (file)
@@ -84,6 +84,13 @@ func (v *Value) AuxInt8() int8 {
        return int8(v.AuxInt)
 }
 
+func (v *Value) AuxUInt8() uint8 {
+       if opcodeTable[v.Op].auxType != auxUInt8 {
+               v.Fatalf("op %s doesn't have an uint8 aux field", v.Op)
+       }
+       return uint8(v.AuxInt)
+}
+
 func (v *Value) AuxInt16() int16 {
        if opcodeTable[v.Op].auxType != auxInt16 {
                v.Fatalf("op %s doesn't have an int16 aux field", v.Op)
@@ -190,6 +197,8 @@ func (v *Value) auxString() string {
                return fmt.Sprintf(" [%d]", v.AuxInt32())
        case auxInt64, auxInt128:
                return fmt.Sprintf(" [%d]", v.AuxInt)
+       case auxUInt8:
+               return fmt.Sprintf(" [%d]", v.AuxUInt8())
        case auxARM64BitField:
                lsb := v.AuxArm64BitField().getARM64BFlsb()
                width := v.AuxArm64BitField().getARM64BFwidth()
@@ -202,6 +211,7 @@ func (v *Value) auxString() string {
                if v.Aux != nil {
                        return fmt.Sprintf(" {%v}", v.Aux)
                }
+               return ""
        case auxSymOff, auxCallOff, auxTypSize, auxNameOffsetInt8:
                s := ""
                if v.Aux != nil {
@@ -223,8 +233,12 @@ func (v *Value) auxString() string {
                return fmt.Sprintf(" {%v}", v.Aux)
        case auxFlagConstant:
                return fmt.Sprintf("[%s]", flagConstant(v.AuxInt))
+       case auxNone:
+               return ""
+       default:
+               // If you see this, add a case above instead.
+               return fmt.Sprintf("[auxtype=%d AuxInt=%d Aux=%v]", opcodeTable[v.Op].auxType, v.AuxInt, v.Aux)
        }
-       return ""
 }
 
 // If/when midstack inlining is enabled (-l=4), the compiler gets both larger and slower.
diff --git a/src/cmd/compile/internal/test/memcombine_test.go b/src/cmd/compile/internal/test/memcombine_test.go
new file mode 100644 (file)
index 0000000..c7e7a20
--- /dev/null
@@ -0,0 +1,73 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+       "encoding/binary"
+       "testing"
+)
+
+var gv = [16]byte{0, 1, 2, 3, 4, 5, 6, 7, 8}
+
+//go:noinline
+func readGlobalUnaligned() uint64 {
+       return binary.LittleEndian.Uint64(gv[1:])
+}
+
+func TestUnalignedGlobal(t *testing.T) {
+       // Note: this is a test not so much of the result of the read, but of
+       // the correct compilation of that read. On s390x unaligned global
+       // accesses fail to compile.
+       if got, want := readGlobalUnaligned(), uint64(0x0807060504030201); got != want {
+               t.Errorf("read global %x, want %x", got, want)
+       }
+}
+
+func TestSpillOfExtendedEndianLoads(t *testing.T) {
+       b := []byte{0xaa, 0xbb, 0xcc, 0xdd}
+
+       var testCases = []struct {
+               fn   func([]byte) uint64
+               want uint64
+       }{
+               {readUint16le, 0xbbaa},
+               {readUint16be, 0xaabb},
+               {readUint32le, 0xddccbbaa},
+               {readUint32be, 0xaabbccdd},
+       }
+       for _, test := range testCases {
+               if got := test.fn(b); got != test.want {
+                       t.Errorf("got %x, want %x", got, test.want)
+               }
+       }
+}
+
+func readUint16le(b []byte) uint64 {
+       y := uint64(binary.LittleEndian.Uint16(b))
+       nop() // force spill
+       return y
+}
+
+func readUint16be(b []byte) uint64 {
+       y := uint64(binary.BigEndian.Uint16(b))
+       nop() // force spill
+       return y
+}
+
+func readUint32le(b []byte) uint64 {
+       y := uint64(binary.LittleEndian.Uint32(b))
+       nop() // force spill
+       return y
+}
+
+func readUint32be(b []byte) uint64 {
+       y := uint64(binary.BigEndian.Uint32(b))
+       nop() // force spill
+       return y
+}
+
+//go:noinline
+func nop() {
+}