]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize ARM64 with shifted register indexed load/store
authorBen Shi <powerman1st@163.com>
Sun, 22 Apr 2018 00:51:00 +0000 (00:51 +0000)
committerCherry Zhang <cherryyz@google.com>
Fri, 27 Apr 2018 20:02:05 +0000 (20:02 +0000)
ARM64 supports efficient instructions which combine shift, addition, load/store
together. Such as "MOVD (R0)(R1<<3), R2" and "MOVWU R6, (R4)(R1<<2)".

This CL optimizes the compiler to emit such efficient instuctions. And below
is some test data.

1. binary size before/after
binary                 size change
pkg/linux_arm64        +80.1KB
pkg/tool/linux_arm64   +121.9KB
go                     -4.3KB
gofmt                  -64KB

2. go1 benchmark
There is big improvement for the test case Fannkuch11, and slight
improvement for sme others, excluding noise.

name                     old time/op    new time/op    delta
BinaryTree17-4              43.9s ± 2%     44.0s ± 2%     ~     (p=0.820 n=30+30)
Fannkuch11-4                30.6s ± 2%     24.5s ± 3%  -19.93%  (p=0.000 n=25+30)
FmtFprintfEmpty-4           500ns ± 0%     499ns ± 0%   -0.11%  (p=0.000 n=23+25)
FmtFprintfString-4         1.03µs ± 0%    1.04µs ± 3%     ~     (p=0.065 n=29+30)
FmtFprintfInt-4            1.15µs ± 3%    1.15µs ± 4%   -0.56%  (p=0.000 n=30+30)
FmtFprintfIntInt-4         1.80µs ± 5%    1.82µs ± 0%     ~     (p=0.094 n=30+24)
FmtFprintfPrefixedInt-4    2.17µs ± 5%    2.20µs ± 0%     ~     (p=0.100 n=30+23)
FmtFprintfFloat-4          3.08µs ± 3%    3.09µs ± 4%     ~     (p=0.123 n=30+30)
FmtManyArgs-4              7.41µs ± 4%    7.17µs ± 1%   -3.26%  (p=0.000 n=30+23)
GobDecode-4                93.7ms ± 0%    94.7ms ± 4%     ~     (p=0.685 n=24+30)
GobEncode-4                78.7ms ± 7%    77.1ms ± 0%     ~     (p=0.729 n=30+23)
Gzip-4                      4.01s ± 0%     3.97s ± 5%   -1.11%  (p=0.037 n=24+30)
Gunzip-4                    389ms ± 4%     384ms ± 0%     ~     (p=0.155 n=30+23)
HTTPClientServer-4          536µs ± 1%     537µs ± 1%     ~     (p=0.236 n=30+30)
JSONEncode-4                179ms ± 1%     182ms ± 6%     ~     (p=0.763 n=24+30)
JSONDecode-4                843ms ± 0%     839ms ± 6%   -0.42%  (p=0.003 n=25+30)
Mandelbrot200-4            46.5ms ± 0%    46.5ms ± 0%   +0.02%  (p=0.000 n=26+26)
GoParse-4                  44.3ms ± 6%    43.3ms ± 0%     ~     (p=0.067 n=30+27)
RegexpMatchEasy0_32-4      1.07µs ± 7%    1.07µs ± 4%     ~     (p=0.835 n=30+30)
RegexpMatchEasy0_1K-4      5.51µs ± 0%    5.49µs ± 0%   -0.35%  (p=0.000 n=23+26)
RegexpMatchEasy1_32-4      1.01µs ± 0%    1.02µs ± 4%   +0.96%  (p=0.014 n=24+30)
RegexpMatchEasy1_1K-4      7.43µs ± 0%    7.18µs ± 0%   -3.41%  (p=0.000 n=23+24)
RegexpMatchMedium_32-4     1.78µs ± 0%    1.81µs ± 4%   +1.47%  (p=0.012 n=23+30)
RegexpMatchMedium_1K-4      547µs ± 1%     542µs ± 3%   -0.90%  (p=0.003 n=24+30)
RegexpMatchHard_32-4       30.4µs ± 0%    29.7µs ± 0%   -2.15%  (p=0.000 n=19+23)
RegexpMatchHard_1K-4        913µs ± 0%     915µs ± 6%   +0.25%  (p=0.012 n=24+30)
Revcomp-4                   6.32s ± 1%     6.42s ± 4%     ~     (p=0.342 n=25+30)
Template-4                  868ms ± 6%     878ms ± 6%   +1.15%  (p=0.000 n=30+30)
TimeParse-4                4.57µs ± 4%    4.59µs ± 3%   +0.65%  (p=0.010 n=29+30)
TimeFormat-4               4.51µs ± 0%    4.50µs ± 0%   -0.27%  (p=0.000 n=27+24)
[Geo mean]                  695µs          689µs        -0.92%

name                     old speed      new speed      delta
GobDecode-4              8.19MB/s ± 0%  8.12MB/s ± 4%     ~     (p=0.680 n=24+30)
GobEncode-4              9.76MB/s ± 7%  9.96MB/s ± 0%     ~     (p=0.616 n=30+23)
Gzip-4                   4.84MB/s ± 0%  4.89MB/s ± 4%   +1.16%  (p=0.030 n=24+30)
Gunzip-4                 49.9MB/s ± 4%  50.6MB/s ± 0%     ~     (p=0.162 n=30+23)
JSONEncode-4             10.9MB/s ± 1%  10.7MB/s ± 6%     ~     (p=0.575 n=24+30)
JSONDecode-4             2.30MB/s ± 0%  2.32MB/s ± 5%   +0.72%  (p=0.003 n=22+30)
GoParse-4                1.31MB/s ± 6%  1.34MB/s ± 0%   +2.26%  (p=0.002 n=30+27)
RegexpMatchEasy0_32-4    30.0MB/s ± 6%  30.0MB/s ± 4%     ~     (p=1.000 n=30+30)
RegexpMatchEasy0_1K-4     186MB/s ± 0%   187MB/s ± 0%   +0.35%  (p=0.000 n=23+26)
RegexpMatchEasy1_32-4    31.8MB/s ± 0%  31.5MB/s ± 4%   -0.92%  (p=0.012 n=25+30)
RegexpMatchEasy1_1K-4     138MB/s ± 0%   143MB/s ± 0%   +3.53%  (p=0.000 n=23+24)
RegexpMatchMedium_32-4    560kB/s ± 0%   553kB/s ± 4%   -1.19%  (p=0.005 n=23+30)
RegexpMatchMedium_1K-4   1.87MB/s ± 0%  1.89MB/s ± 3%   +1.04%  (p=0.002 n=24+30)
RegexpMatchHard_32-4     1.05MB/s ± 0%  1.08MB/s ± 0%   +2.40%  (p=0.000 n=19+23)
RegexpMatchHard_1K-4     1.12MB/s ± 0%  1.12MB/s ± 5%   +0.12%  (p=0.006 n=25+30)
Revcomp-4                40.2MB/s ± 1%  39.6MB/s ± 4%     ~     (p=0.242 n=25+30)
Template-4               2.24MB/s ± 6%  2.21MB/s ± 6%   -1.15%  (p=0.000 n=30+30)
[Geo mean]               7.87MB/s       7.91MB/s        +0.44%

Change-Id: If374cb7abf83537aa0a176f73c0f736f7800db03
Reviewed-on: https://go-review.googlesource.com/108735
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
test/codegen/memcombine.go

index c7aea631c0dd07cf98351152a3985adad14f1cd3..017c5b9398862fd3df485d5d746001e34054af84 100644 (file)
@@ -92,6 +92,23 @@ func genshift(s *gc.SSAGenState, as obj.As, r0, r1, r int16, typ int64, n int64)
        return p
 }
 
+// generate the memory operand for the indexed load/store instructions
+func genIndexedOperand(v *ssa.Value) obj.Addr {
+       // Reg: base register, Index: (shifted) index register
+       mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
+       switch v.Op {
+       case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8:
+               mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
+       case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4:
+               mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
+       case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
+               mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
+       default: // not shifted
+               mop.Index = v.Args[1].Reg()
+       }
+       return mop
+}
+
 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        switch v.Op {
        case ssa.OpCopy, ssa.OpARM64MOVDreg:
@@ -351,12 +368,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                ssa.OpARM64MOVHUloadidx,
                ssa.OpARM64MOVWloadidx,
                ssa.OpARM64MOVWUloadidx,
-               ssa.OpARM64MOVDloadidx:
+               ssa.OpARM64MOVDloadidx,
+               ssa.OpARM64MOVHloadidx2,
+               ssa.OpARM64MOVHUloadidx2,
+               ssa.OpARM64MOVWloadidx4,
+               ssa.OpARM64MOVWUloadidx4,
+               ssa.OpARM64MOVDloadidx8:
                p := s.Prog(v.Op.Asm())
-               p.From.Type = obj.TYPE_MEM
-               p.From.Name = obj.NAME_NONE
-               p.From.Reg = v.Args[0].Reg()
-               p.From.Index = v.Args[1].Reg()
+               p.From = genIndexedOperand(v)
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
        case ssa.OpARM64LDAR,
@@ -384,14 +403,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        case ssa.OpARM64MOVBstoreidx,
                ssa.OpARM64MOVHstoreidx,
                ssa.OpARM64MOVWstoreidx,
-               ssa.OpARM64MOVDstoreidx:
+               ssa.OpARM64MOVDstoreidx,
+               ssa.OpARM64MOVHstoreidx2,
+               ssa.OpARM64MOVWstoreidx4,
+               ssa.OpARM64MOVDstoreidx8:
                p := s.Prog(v.Op.Asm())
+               p.To = genIndexedOperand(v)
                p.From.Type = obj.TYPE_REG
                p.From.Reg = v.Args[2].Reg()
-               p.To.Type = obj.TYPE_MEM
-               p.To.Name = obj.NAME_NONE
-               p.To.Reg = v.Args[0].Reg()
-               p.To.Index = v.Args[1].Reg()
        case ssa.OpARM64STP:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REGREG
@@ -413,14 +432,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        case ssa.OpARM64MOVBstorezeroidx,
                ssa.OpARM64MOVHstorezeroidx,
                ssa.OpARM64MOVWstorezeroidx,
-               ssa.OpARM64MOVDstorezeroidx:
+               ssa.OpARM64MOVDstorezeroidx,
+               ssa.OpARM64MOVHstorezeroidx2,
+               ssa.OpARM64MOVWstorezeroidx4,
+               ssa.OpARM64MOVDstorezeroidx8:
                p := s.Prog(v.Op.Asm())
+               p.To = genIndexedOperand(v)
                p.From.Type = obj.TYPE_REG
                p.From.Reg = arm64.REGZERO
-               p.To.Type = obj.TYPE_MEM
-               p.To.Name = obj.NAME_NONE
-               p.To.Reg = v.Args[0].Reg()
-               p.To.Index = v.Args[1].Reg()
        case ssa.OpARM64MOVQstorezero:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REGREG
index 59fb1bd220789c1e20070dd9e0fa3e737eaeb2c8..d713fd8fda853b505294cbe1f5ee6dd72a249aad 100644 (file)
 (MOVBloadidx ptr (MOVDconst [c]) mem) -> (MOVBload [c] ptr mem)
 (MOVBloadidx (MOVDconst [c]) ptr mem) -> (MOVBload [c] ptr mem)
 
+// shifted register indexed load
+(MOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil -> (MOVDloadidx8 ptr idx mem)
+(MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWUloadidx4 ptr idx mem)
+(MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWloadidx4 ptr idx mem)
+(MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHUloadidx2 ptr idx mem)
+(MOVHload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHloadidx2 ptr idx mem)
+(MOVDloadidx ptr (SLLconst [3] idx) mem) -> (MOVDloadidx8 ptr idx mem)
+(MOVWloadidx ptr (SLLconst [2] idx) mem) -> (MOVWloadidx4 ptr idx mem)
+(MOVWUloadidx ptr (SLLconst [2] idx) mem) -> (MOVWUloadidx4 ptr idx mem)
+(MOVHloadidx ptr (SLLconst [1] idx) mem) -> (MOVHloadidx2 ptr idx mem)
+(MOVHUloadidx ptr (SLLconst [1] idx) mem) -> (MOVHUloadidx2 ptr idx mem)
+(MOVHloadidx ptr (ADD idx idx) mem) -> (MOVHloadidx2 ptr idx mem)
+(MOVHUloadidx ptr (ADD idx idx) mem) -> (MOVHUloadidx2 ptr idx mem)
+(MOVDloadidx (SLLconst [3] idx) ptr mem) -> (MOVDloadidx8 ptr idx mem)
+(MOVWloadidx (SLLconst [2] idx) ptr mem) -> (MOVWloadidx4 ptr idx mem)
+(MOVWUloadidx (SLLconst [2] idx) ptr mem) -> (MOVWUloadidx4 ptr idx mem)
+(MOVHloadidx (ADD idx idx) ptr mem) -> (MOVHloadidx2 ptr idx mem)
+(MOVHUloadidx (ADD idx idx) ptr mem) -> (MOVHUloadidx2 ptr idx mem)
+(MOVDloadidx8 ptr (MOVDconst [c]) mem) -> (MOVDload [c<<3] ptr mem)
+(MOVWUloadidx4 ptr (MOVDconst [c]) mem) -> (MOVWUload [c<<2] ptr mem)
+(MOVWloadidx4 ptr (MOVDconst [c]) mem) -> (MOVWload [c<<2] ptr mem)
+(MOVHUloadidx2 ptr (MOVDconst [c]) mem) -> (MOVHUload [c<<1] ptr mem)
+(MOVHloadidx2 ptr (MOVDconst [c]) mem) -> (MOVHload [c<<1] ptr mem)
+
 (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
        && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
        (MOVBstore [off1+off2] {sym} ptr val mem)
 (MOVBstoreidx ptr (MOVDconst [c]) val mem) -> (MOVBstore [c] ptr val mem)
 (MOVBstoreidx (MOVDconst [c]) idx val mem) -> (MOVBstore [c] idx val mem)
 
+// shifted register indexed store
+(MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil -> (MOVDstoreidx8 ptr idx val mem)
+(MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil -> (MOVWstoreidx4 ptr idx val mem)
+(MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem) && off == 0 && sym == nil -> (MOVHstoreidx2 ptr idx val mem)
+(MOVDstoreidx ptr (SLLconst [3] idx) val mem) -> (MOVDstoreidx8 ptr idx val mem)
+(MOVWstoreidx ptr (SLLconst [2] idx) val mem) -> (MOVWstoreidx4 ptr idx val mem)
+(MOVHstoreidx ptr (SLLconst [1] idx) val mem) -> (MOVHstoreidx2 ptr idx val mem)
+(MOVHstoreidx ptr (ADD idx idx) val mem) -> (MOVHstoreidx2 ptr idx val mem)
+(MOVDstoreidx (SLLconst [3] idx) ptr val mem) -> (MOVDstoreidx8 ptr idx val mem)
+(MOVWstoreidx (SLLconst [2] idx) ptr val mem) -> (MOVWstoreidx4 ptr idx val mem)
+(MOVHstoreidx (SLLconst [1] idx) ptr val mem) -> (MOVHstoreidx2 ptr idx val mem)
+(MOVHstoreidx (ADD idx idx) ptr val mem) -> (MOVHstoreidx2 ptr idx val mem)
+(MOVDstoreidx8 ptr (MOVDconst [c]) val mem) -> (MOVDstore [c<<3] ptr val mem)
+(MOVWstoreidx4 ptr (MOVDconst [c]) val mem) -> (MOVWstore [c<<2] ptr val mem)
+(MOVHstoreidx2 ptr (MOVDconst [c]) val mem) -> (MOVHstore [c<<1] ptr val mem)
+
 (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        && canMergeSym(sym1,sym2) && is32Bit(off1+off2)
        && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
 (MOVBstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVBstorezero [c] ptr mem)
 (MOVBstorezeroidx (MOVDconst [c]) idx mem) -> (MOVBstorezero [c] idx mem)
 
+// shifted register indexed store zero
+(MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil -> (MOVDstorezeroidx8 ptr idx mem)
+(MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWstorezeroidx4 ptr idx mem)
+(MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHstorezeroidx2 ptr idx mem)
+(MOVDstorezeroidx ptr (SLLconst [3] idx) mem) -> (MOVDstorezeroidx8 ptr idx mem)
+(MOVWstorezeroidx ptr (SLLconst [2] idx) mem) -> (MOVWstorezeroidx4 ptr idx mem)
+(MOVHstorezeroidx ptr (SLLconst [1] idx) mem) -> (MOVHstorezeroidx2 ptr idx mem)
+(MOVHstorezeroidx ptr (ADD idx idx) mem) -> (MOVHstorezeroidx2 ptr idx mem)
+(MOVDstorezeroidx (SLLconst [3] idx) ptr mem) -> (MOVDstorezeroidx8 ptr idx mem)
+(MOVWstorezeroidx (SLLconst [2] idx) ptr mem) -> (MOVWstorezeroidx4 ptr idx mem)
+(MOVHstorezeroidx (SLLconst [1] idx) ptr mem) -> (MOVHstorezeroidx2 ptr idx mem)
+(MOVHstorezeroidx (ADD idx idx) ptr mem) -> (MOVHstorezeroidx2 ptr idx mem)
+(MOVDstoreidx8 ptr idx (MOVDconst [0]) mem) -> (MOVDstorezeroidx8 ptr idx mem)
+(MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) -> (MOVWstorezeroidx4 ptr idx mem)
+(MOVHstoreidx2 ptr idx (MOVDconst [0]) mem) -> (MOVHstorezeroidx2 ptr idx mem)
+(MOVDstorezeroidx8 ptr (MOVDconst [c]) mem) -> (MOVDstorezero [c<<3] ptr mem)
+(MOVWstorezeroidx4 ptr (MOVDconst [c]) mem) -> (MOVWstorezero [c<<2] ptr mem)
+(MOVHstorezeroidx2 ptr (MOVDconst [c]) mem) -> (MOVHstorezero [c<<1] ptr mem)
+
 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
 // these seem to have bad interaction with other rules, resulting in slower code
 //(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x)
 (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
        && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
 
+(MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
+(MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
+(MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
+(MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
+(MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
+
 // don't extend after proper load
 (MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
 (MOVBUreg x:(MOVBUload _ _)) -> (MOVDreg x)
 (MOVWUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
 (MOVWUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
 (MOVWUreg x:(MOVWUloadidx _ _ _)) -> (MOVDreg x)
+(MOVHreg x:(MOVHloadidx2 _ _ _)) -> (MOVDreg x)
+(MOVHUreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVHloadidx2 _ _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVWloadidx4 _ _ _)) -> (MOVDreg x)
+(MOVWUreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
+(MOVWUreg x:(MOVWUloadidx4 _ _ _)) -> (MOVDreg x)
 
 // fold double extensions
 (MOVBreg x:(MOVBreg _)) -> (MOVDreg x)
 (MOVHstoreidx ptr idx (MOVWUreg x) mem) -> (MOVHstoreidx ptr idx x mem)
 (MOVWstoreidx ptr idx (MOVWreg x) mem) -> (MOVWstoreidx ptr idx x mem)
 (MOVWstoreidx ptr idx (MOVWUreg x) mem) -> (MOVWstoreidx ptr idx x mem)
+(MOVHstoreidx2 ptr idx (MOVHreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
+(MOVHstoreidx2 ptr idx (MOVHUreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
+(MOVHstoreidx2 ptr idx (MOVWreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
+(MOVHstoreidx2 ptr idx (MOVWUreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
+(MOVWstoreidx4 ptr idx (MOVWreg x) mem) -> (MOVWstoreidx4 ptr idx x mem)
+(MOVWstoreidx4 ptr idx (MOVWUreg x) mem) -> (MOVWstoreidx4 ptr idx x mem)
 
 // if a register move has only 1 use, just use the same register without emitting instruction
 // MOVDnop doesn't emit instruction, only for ensuring the type.
        && clobber(x0) && clobber(x1)
        && clobber(y0) && clobber(y1)
        -> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
+(ORshiftLL <t> [8]
+       y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))
+       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
+       && x0.Uses == 1 && x1.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1
+       && mergePoint(b,x0,x1) != nil
+       && clobber(x0) && clobber(x1)
+       && clobber(y0) && clobber(y1)
+       -> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
 
 // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit
 (ORshiftLL <t> [24] o0:(ORshiftLL [16]
        && clobber(y1) && clobber(y2)
        && clobber(o0)
        -> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
+(ORshiftLL <t> [24] o0:(ORshiftLL [16]
+                   x0:(MOVHUloadidx ptr idx mem)
+       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
+       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
+       && y1.Uses == 1 && y2.Uses == 1
+       && o0.Uses == 1
+       && mergePoint(b,x0,x1,x2) != nil
+       && clobber(x0) && clobber(x1) && clobber(x2)
+       && clobber(y1) && clobber(y2)
+       && clobber(o0)
+       -> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
+(ORshiftLL <t> [24] o0:(ORshiftLL [16]
+                   x0:(MOVHUloadidx2 ptr0 idx0 mem)
+       y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem)))
+       y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
+       && y1.Uses == 1 && y2.Uses == 1
+       && o0.Uses == 1
+       && mergePoint(b,x0,x1,x2) != nil
+       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
+       && isSamePtr(p1, p)
+       && clobber(x0) && clobber(x1) && clobber(x2)
+       && clobber(y1) && clobber(y2)
+       && clobber(o0)
+       -> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
 
 // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit
 (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
        && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
        && clobber(o0) && clobber(o1) && clobber(o2)
        -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
+(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
+                   x0:(MOVWUloadidx4 ptr0 idx0 mem)
+       y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem)))
+       y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
+       && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3,x4) != nil
+       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
+       && isSamePtr(p1, p)
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
+       && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
+       && clobber(o0) && clobber(o1) && clobber(o2)
+       -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
+(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
+                   x0:(MOVWUloadidx ptr idx mem)
+       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
+       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
+       y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
+       y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
+       && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3,x4) != nil
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
+       && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
+       && clobber(o0) && clobber(o1) && clobber(o2)
+       -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
 
 // b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 32-bit
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
        && clobber(o0) && clobber(o1) && clobber(s0)
        -> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
+(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
+       y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
+       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
+       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
+       y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)))
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3) != nil
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
+       && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
+       && clobber(o0) && clobber(o1) && clobber(s0)
+       -> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
 
 // b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
        && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
        && clobber(o4) && clobber(o5) && clobber(s0)
        -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
+(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
+       y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
+       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
+       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
+       y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
+       y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
+       y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
+       y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
+       y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
+       && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
+       && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
+       && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
+       && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
+       && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
+       && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
+       && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
+       && clobber(o4) && clobber(o5) && clobber(s0)
+       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
 
 // big endian loads
 // b[1] | b[0]<<8 -> load 16-bit, reverse
        && clobber(x0) && clobber(x1)
        && clobber(y0) && clobber(y1)
        -> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
+(ORshiftLL <t> [8]
+       y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem))
+       y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
+       && x0.Uses == 1 && x1.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1
+       && mergePoint(b,x0,x1) != nil
+       && clobber(x0) && clobber(x1)
+       && clobber(y0) && clobber(y1)
+       -> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
 
 // b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit, reverse
 (ORshiftLL <t> [24] o0:(ORshiftLL [16]
        && clobber(y0) && clobber(y1) && clobber(y2)
        && clobber(o0)
        -> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
+(ORshiftLL <t> [24] o0:(ORshiftLL [16]
+       y0:(REV16W  x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem))
+       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
+       y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
+       && o0.Uses == 1
+       && mergePoint(b,x0,x1,x2) != nil
+       && clobber(x0) && clobber(x1) && clobber(x2)
+       && clobber(y0) && clobber(y1) && clobber(y2)
+       && clobber(o0)
+       -> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
 
 // b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit, reverse
 (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
        && clobber(o0) && clobber(o1) && clobber(o2)
        -> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
+(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
+       y0:(REVW    x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem))
+       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
+       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
+       y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
+       y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3,x4) != nil
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
+       && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
+       && clobber(o0) && clobber(o1) && clobber(o2)
+       -> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
 
 // b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] -> load 32-bit, reverse
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
        && clobber(o0) && clobber(o1) && clobber(s0)
        -> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
+(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
+       y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
+       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
+       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
+       y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3) != nil
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
+       && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
+       && clobber(o0) && clobber(o1) && clobber(s0)
+       -> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
 
 // b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] -> load 64-bit, reverse
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
        && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
        && clobber(o4) && clobber(o5) && clobber(s0)
        -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
+(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
+       y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
+       y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
+       y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
+       y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
+       y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
+       y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
+       y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
+       y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
+       && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
+       && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
+       && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
+       && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
+       && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
+       && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
+       && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
+       && clobber(o4) && clobber(o5) && clobber(s0)
+       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
 
 // Combine zero stores into larger (unaligned) stores.
 (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVHstorezeroidx ptr1 idx1 mem)
+(MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
+       && x.Uses == 1
+       && clobber(x)
+       -> (MOVHstorezeroidx ptr idx mem)
 (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
        && x.Uses == 1
        && areAdjacentOffsets(i,j,2)
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVWstorezeroidx ptr1 idx1 mem)
+(MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
+       && x.Uses == 1
+       && clobber(x)
+       -> (MOVWstorezeroidx ptr idx mem)
+(MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
+       && x.Uses == 1
+       && s == nil
+       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
+       && clobber(x)
+       -> (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
 (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
        && x.Uses == 1
        && areAdjacentOffsets(i,j,4)
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVDstorezeroidx ptr1 idx1 mem)
+(MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
+       && x.Uses == 1
+       && clobber(x)
+       -> (MOVDstorezeroidx ptr idx mem)
+(MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
+       && x.Uses == 1
+       && s == nil
+       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
+       && clobber(x)
+       -> (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
 (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
        && x.Uses == 1
        && areAdjacentOffsets(i,j,8)
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVQstorezero [0] {s} p0 mem)
+(MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
+       && x.Uses == 1
+       && s == nil
+       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
+       && clobber(x)
+       -> (MOVQstorezero [0] {s} p0 mem)
 
 // Combine stores into larger (unaligned) stores.
 (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVHstoreidx ptr1 idx1 w mem)
+(MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
+       && x.Uses == 1
+       && clobber(x)
+       -> (MOVHstoreidx ptr idx w mem)
 (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVWstoreidx ptr1 idx1 w mem)
+(MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
+       && x.Uses == 1
+       && clobber(x)
+       -> (MOVWstoreidx ptr idx w mem)
+(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
+       && clobber(x)
+       -> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
 (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVWstoreidx ptr1 idx1 w mem)
+(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
+       && clobber(x)
+       -> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
 (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVWstoreidx ptr1 idx1 w mem)
+(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
+       && clobber(x)
+       -> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
 (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVWstoreidx ptr1 idx1 w0 mem)
+(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
+       && x.Uses == 1
+       && s == nil
+       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
+       && clobber(x)
+       -> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
 (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVDstoreidx ptr1 idx1 w mem)
+(MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
+       && x.Uses == 1
+       && clobber(x)
+       -> (MOVDstoreidx ptr idx w mem)
+(MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
+       && clobber(x)
+       -> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
 (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVDstoreidx ptr1 idx1 w0 mem)
+(MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
+       && x.Uses == 1
+       && s == nil
+       && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
+       && clobber(x)
+       -> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
 (MOVBstore [i] {s} ptr w
        x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
        x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
        && clobber(x1)
        && clobber(x2)
        -> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
+(MOVBstoreidx ptr (ADDconst [3] idx) w
+       x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(8, 24)] w)
+       x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(16, 16)] w)
+       x2:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
+       && x0.Uses == 1
+       && x1.Uses == 1
+       && x2.Uses == 1
+       && clobber(x0)
+       && clobber(x1)
+       && clobber(x2)
+       -> (MOVWstoreidx ptr idx (REVW <w.Type> w) mem)
+(MOVBstoreidx ptr idx w
+       x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 24)] w)
+       x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(16, 16)] w)
+       x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
+       && x0.Uses == 1
+       && x1.Uses == 1
+       && x2.Uses == 1
+       && clobber(x0)
+       && clobber(x1)
+       && clobber(x2)
+       -> (MOVWstoreidx ptr idx w mem)
 (MOVBstore [i] {s} ptr w
        x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w))
        x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w))
        && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
        && clobber(x)
        -> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
+(MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(8, 8)] w) mem))
+       && x.Uses == 1
+       && clobber(x)
+       -> (MOVHstoreidx ptr idx (REV16W <w.Type> w) mem)
+(MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 8)] w) mem))
+       && x.Uses == 1
+       && clobber(x)
+       -> (MOVHstoreidx ptr idx w mem)
 (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
        && x.Uses == 1
        && clobber(x)
index 5ee984027bdc3a649602ee967360565e2aafd6b6..3f821e1ce9b05d9456a319d2794fd6026899923d 100644 (file)
@@ -324,13 +324,20 @@ func init() {
                {name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux.  arg1=mem.
 
                // register indexed load
-               {name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", faultOnNilArg0: true},   // load 64-bit dword from arg0 + arg1, arg2 = mem.
-               {name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW", faultOnNilArg0: true},   // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
-               {name: "MOVWUloadidx", argLength: 3, reg: gp2load, asm: "MOVWU", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
-               {name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH", faultOnNilArg0: true},   // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
-               {name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
-               {name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB", faultOnNilArg0: true},   // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
-               {name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
+               {name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD"},   // load 64-bit dword from arg0 + arg1, arg2 = mem.
+               {name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW"},   // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
+               {name: "MOVWUloadidx", argLength: 3, reg: gp2load, asm: "MOVWU"}, // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
+               {name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH"},   // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
+               {name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU"}, // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
+               {name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB"},   // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
+               {name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU"}, // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
+
+               // shifted register indexed load
+               {name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH"},   // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
+               {name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
+               {name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW"},   // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
+               {name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
+               {name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD"},   // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
 
                {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},   // store 1 byte of arg1 to arg0 + auxInt + aux.  arg2=mem.
                {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},   // store 2 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
@@ -341,10 +348,15 @@ func init() {
                {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
 
                // register indexed store
-               {name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem.
-               {name: "MOVHstoreidx", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem.
-               {name: "MOVWstoreidx", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem.
-               {name: "MOVDstoreidx", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem.
+               {name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem"}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem.
+               {name: "MOVHstoreidx", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem.
+               {name: "MOVWstoreidx", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem.
+               {name: "MOVDstoreidx", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem.
+
+               // shifted register indexed store
+               {name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
+               {name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
+               {name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
 
                {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux.  arg1=mem.
                {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
@@ -353,10 +365,15 @@ func init() {
                {name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},  // store 16 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
 
                // register indexed store zero
-               {name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of zero to arg0 + arg1, arg2 = mem.
-               {name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem.
-               {name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem.
-               {name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem.
+               {name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem"}, // store 1 byte of zero to arg0 + arg1, arg2 = mem.
+               {name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem.
+               {name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem.
+               {name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem.
+
+               // shifted register indexed store zero
+               {name: "MOVHstorezeroidx2", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + arg1*2, arg2 = mem.
+               {name: "MOVWstorezeroidx4", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + arg1*4, arg2 = mem.
+               {name: "MOVDstorezeroidx8", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + arg1*8, arg2 = mem.
 
                {name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion)
                {name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion)
index 50ad872b43a69eedaf2827129ce0339c8d729a1d..4c7b45fbfe4ecf3c62f8f6fda6f97f53ff1aecb7 100644 (file)
@@ -1168,6 +1168,11 @@ const (
        OpARM64MOVHUloadidx
        OpARM64MOVBloadidx
        OpARM64MOVBUloadidx
+       OpARM64MOVHloadidx2
+       OpARM64MOVHUloadidx2
+       OpARM64MOVWloadidx4
+       OpARM64MOVWUloadidx4
+       OpARM64MOVDloadidx8
        OpARM64MOVBstore
        OpARM64MOVHstore
        OpARM64MOVWstore
@@ -1179,6 +1184,9 @@ const (
        OpARM64MOVHstoreidx
        OpARM64MOVWstoreidx
        OpARM64MOVDstoreidx
+       OpARM64MOVHstoreidx2
+       OpARM64MOVWstoreidx4
+       OpARM64MOVDstoreidx8
        OpARM64MOVBstorezero
        OpARM64MOVHstorezero
        OpARM64MOVWstorezero
@@ -1188,6 +1196,9 @@ const (
        OpARM64MOVHstorezeroidx
        OpARM64MOVWstorezeroidx
        OpARM64MOVDstorezeroidx
+       OpARM64MOVHstorezeroidx2
+       OpARM64MOVWstorezeroidx4
+       OpARM64MOVDstorezeroidx8
        OpARM64FMOVDgpfp
        OpARM64FMOVDfpgp
        OpARM64MOVBreg
@@ -15233,10 +15244,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVDloadidx",
-               argLen:         3,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVD,
+               name:   "MOVDloadidx",
+               argLen: 3,
+               asm:    arm64.AMOVD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15248,10 +15258,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVWloadidx",
-               argLen:         3,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVW,
+               name:   "MOVWloadidx",
+               argLen: 3,
+               asm:    arm64.AMOVW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15263,10 +15272,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVWUloadidx",
-               argLen:         3,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVWU,
+               name:   "MOVWUloadidx",
+               argLen: 3,
+               asm:    arm64.AMOVWU,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15278,10 +15286,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVHloadidx",
-               argLen:         3,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVH,
+               name:   "MOVHloadidx",
+               argLen: 3,
+               asm:    arm64.AMOVH,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15293,10 +15300,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVHUloadidx",
-               argLen:         3,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVHU,
+               name:   "MOVHUloadidx",
+               argLen: 3,
+               asm:    arm64.AMOVHU,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15308,10 +15314,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVBloadidx",
-               argLen:         3,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVB,
+               name:   "MOVBloadidx",
+               argLen: 3,
+               asm:    arm64.AMOVB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15323,10 +15328,79 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVBUloadidx",
-               argLen:         3,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVBU,
+               name:   "MOVBUloadidx",
+               argLen: 3,
+               asm:    arm64.AMOVBU,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:   "MOVHloadidx2",
+               argLen: 3,
+               asm:    arm64.AMOVH,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:   "MOVHUloadidx2",
+               argLen: 3,
+               asm:    arm64.AMOVHU,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:   "MOVWloadidx4",
+               argLen: 3,
+               asm:    arm64.AMOVW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:   "MOVWUloadidx4",
+               argLen: 3,
+               asm:    arm64.AMOVWU,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:   "MOVDloadidx8",
+               argLen: 3,
+               asm:    arm64.AMOVD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15437,10 +15511,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVBstoreidx",
-               argLen:         4,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVB,
+               name:   "MOVBstoreidx",
+               argLen: 4,
+               asm:    arm64.AMOVB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15450,10 +15523,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVHstoreidx",
-               argLen:         4,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVH,
+               name:   "MOVHstoreidx",
+               argLen: 4,
+               asm:    arm64.AMOVH,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15463,10 +15535,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVWstoreidx",
-               argLen:         4,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVW,
+               name:   "MOVWstoreidx",
+               argLen: 4,
+               asm:    arm64.AMOVW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15476,10 +15547,45 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVDstoreidx",
-               argLen:         4,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVD,
+               name:   "MOVDstoreidx",
+               argLen: 4,
+               asm:    arm64.AMOVD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:   "MOVHstoreidx2",
+               argLen: 4,
+               asm:    arm64.AMOVH,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:   "MOVWstoreidx4",
+               argLen: 4,
+               asm:    arm64.AMOVW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:   "MOVDstoreidx8",
+               argLen: 4,
+               asm:    arm64.AMOVD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15554,10 +15660,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVBstorezeroidx",
-               argLen:         3,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVB,
+               name:   "MOVBstorezeroidx",
+               argLen: 3,
+               asm:    arm64.AMOVB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15566,10 +15671,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVHstorezeroidx",
-               argLen:         3,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVH,
+               name:   "MOVHstorezeroidx",
+               argLen: 3,
+               asm:    arm64.AMOVH,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15578,10 +15682,9 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVWstorezeroidx",
-               argLen:         3,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVW,
+               name:   "MOVWstorezeroidx",
+               argLen: 3,
+               asm:    arm64.AMOVW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
@@ -15590,10 +15693,42 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:           "MOVDstorezeroidx",
-               argLen:         3,
-               faultOnNilArg0: true,
-               asm:            arm64.AMOVD,
+               name:   "MOVDstorezeroidx",
+               argLen: 3,
+               asm:    arm64.AMOVD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:   "MOVHstorezeroidx2",
+               argLen: 3,
+               asm:    arm64.AMOVH,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:   "MOVWstorezeroidx4",
+               argLen: 3,
+               asm:    arm64.AMOVW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:   "MOVDstorezeroidx8",
+               argLen: 3,
+               asm:    arm64.AMOVD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
index 334021c259c31e69c2a7302f92f64814e840e9b0..62cffc1b06adbabc546a044b39ac58c7136d3bce 100644 (file)
@@ -150,7 +150,7 @@ func rewriteValueARM64(v *Value) bool {
        case OpARM64MOVBstore:
                return rewriteValueARM64_OpARM64MOVBstore_0(v) || rewriteValueARM64_OpARM64MOVBstore_10(v) || rewriteValueARM64_OpARM64MOVBstore_20(v) || rewriteValueARM64_OpARM64MOVBstore_30(v) || rewriteValueARM64_OpARM64MOVBstore_40(v)
        case OpARM64MOVBstoreidx:
-               return rewriteValueARM64_OpARM64MOVBstoreidx_0(v)
+               return rewriteValueARM64_OpARM64MOVBstoreidx_0(v) || rewriteValueARM64_OpARM64MOVBstoreidx_10(v)
        case OpARM64MOVBstorezero:
                return rewriteValueARM64_OpARM64MOVBstorezero_0(v)
        case OpARM64MOVBstorezeroidx:
@@ -159,58 +159,80 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64MOVDload_0(v)
        case OpARM64MOVDloadidx:
                return rewriteValueARM64_OpARM64MOVDloadidx_0(v)
+       case OpARM64MOVDloadidx8:
+               return rewriteValueARM64_OpARM64MOVDloadidx8_0(v)
        case OpARM64MOVDreg:
                return rewriteValueARM64_OpARM64MOVDreg_0(v)
        case OpARM64MOVDstore:
                return rewriteValueARM64_OpARM64MOVDstore_0(v)
        case OpARM64MOVDstoreidx:
                return rewriteValueARM64_OpARM64MOVDstoreidx_0(v)
+       case OpARM64MOVDstoreidx8:
+               return rewriteValueARM64_OpARM64MOVDstoreidx8_0(v)
        case OpARM64MOVDstorezero:
                return rewriteValueARM64_OpARM64MOVDstorezero_0(v)
        case OpARM64MOVDstorezeroidx:
                return rewriteValueARM64_OpARM64MOVDstorezeroidx_0(v)
+       case OpARM64MOVDstorezeroidx8:
+               return rewriteValueARM64_OpARM64MOVDstorezeroidx8_0(v)
        case OpARM64MOVHUload:
                return rewriteValueARM64_OpARM64MOVHUload_0(v)
        case OpARM64MOVHUloadidx:
                return rewriteValueARM64_OpARM64MOVHUloadidx_0(v)
+       case OpARM64MOVHUloadidx2:
+               return rewriteValueARM64_OpARM64MOVHUloadidx2_0(v)
        case OpARM64MOVHUreg:
-               return rewriteValueARM64_OpARM64MOVHUreg_0(v)
+               return rewriteValueARM64_OpARM64MOVHUreg_0(v) || rewriteValueARM64_OpARM64MOVHUreg_10(v)
        case OpARM64MOVHload:
                return rewriteValueARM64_OpARM64MOVHload_0(v)
        case OpARM64MOVHloadidx:
                return rewriteValueARM64_OpARM64MOVHloadidx_0(v)
+       case OpARM64MOVHloadidx2:
+               return rewriteValueARM64_OpARM64MOVHloadidx2_0(v)
        case OpARM64MOVHreg:
                return rewriteValueARM64_OpARM64MOVHreg_0(v) || rewriteValueARM64_OpARM64MOVHreg_10(v)
        case OpARM64MOVHstore:
-               return rewriteValueARM64_OpARM64MOVHstore_0(v) || rewriteValueARM64_OpARM64MOVHstore_10(v)
+               return rewriteValueARM64_OpARM64MOVHstore_0(v) || rewriteValueARM64_OpARM64MOVHstore_10(v) || rewriteValueARM64_OpARM64MOVHstore_20(v)
        case OpARM64MOVHstoreidx:
-               return rewriteValueARM64_OpARM64MOVHstoreidx_0(v)
+               return rewriteValueARM64_OpARM64MOVHstoreidx_0(v) || rewriteValueARM64_OpARM64MOVHstoreidx_10(v)
+       case OpARM64MOVHstoreidx2:
+               return rewriteValueARM64_OpARM64MOVHstoreidx2_0(v)
        case OpARM64MOVHstorezero:
                return rewriteValueARM64_OpARM64MOVHstorezero_0(v)
        case OpARM64MOVHstorezeroidx:
                return rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v)
+       case OpARM64MOVHstorezeroidx2:
+               return rewriteValueARM64_OpARM64MOVHstorezeroidx2_0(v)
        case OpARM64MOVQstorezero:
                return rewriteValueARM64_OpARM64MOVQstorezero_0(v)
        case OpARM64MOVWUload:
                return rewriteValueARM64_OpARM64MOVWUload_0(v)
        case OpARM64MOVWUloadidx:
                return rewriteValueARM64_OpARM64MOVWUloadidx_0(v)
+       case OpARM64MOVWUloadidx4:
+               return rewriteValueARM64_OpARM64MOVWUloadidx4_0(v)
        case OpARM64MOVWUreg:
                return rewriteValueARM64_OpARM64MOVWUreg_0(v) || rewriteValueARM64_OpARM64MOVWUreg_10(v)
        case OpARM64MOVWload:
                return rewriteValueARM64_OpARM64MOVWload_0(v)
        case OpARM64MOVWloadidx:
                return rewriteValueARM64_OpARM64MOVWloadidx_0(v)
+       case OpARM64MOVWloadidx4:
+               return rewriteValueARM64_OpARM64MOVWloadidx4_0(v)
        case OpARM64MOVWreg:
                return rewriteValueARM64_OpARM64MOVWreg_0(v) || rewriteValueARM64_OpARM64MOVWreg_10(v)
        case OpARM64MOVWstore:
-               return rewriteValueARM64_OpARM64MOVWstore_0(v)
+               return rewriteValueARM64_OpARM64MOVWstore_0(v) || rewriteValueARM64_OpARM64MOVWstore_10(v)
        case OpARM64MOVWstoreidx:
                return rewriteValueARM64_OpARM64MOVWstoreidx_0(v)
+       case OpARM64MOVWstoreidx4:
+               return rewriteValueARM64_OpARM64MOVWstoreidx4_0(v)
        case OpARM64MOVWstorezero:
                return rewriteValueARM64_OpARM64MOVWstorezero_0(v)
        case OpARM64MOVWstorezeroidx:
                return rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v)
+       case OpARM64MOVWstorezeroidx4:
+               return rewriteValueARM64_OpARM64MOVWstorezeroidx4_0(v)
        case OpARM64MUL:
                return rewriteValueARM64_OpARM64MUL_0(v) || rewriteValueARM64_OpARM64MUL_10(v) || rewriteValueARM64_OpARM64MUL_20(v)
        case OpARM64MULW:
@@ -234,7 +256,7 @@ func rewriteValueARM64(v *Value) bool {
        case OpARM64ORconst:
                return rewriteValueARM64_OpARM64ORconst_0(v)
        case OpARM64ORshiftLL:
-               return rewriteValueARM64_OpARM64ORshiftLL_0(v) || rewriteValueARM64_OpARM64ORshiftLL_10(v)
+               return rewriteValueARM64_OpARM64ORshiftLL_0(v) || rewriteValueARM64_OpARM64ORshiftLL_10(v) || rewriteValueARM64_OpARM64ORshiftLL_20(v)
        case OpARM64ORshiftRA:
                return rewriteValueARM64_OpARM64ORshiftRA_0(v)
        case OpARM64ORshiftRL:
@@ -9030,436 +9052,425 @@ func rewriteValueARM64_OpARM64MOVBstoreidx_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBstorezero [off1+off2] {sym} ptr mem)
+       // match: (MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstoreidx ptr idx w mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               idx := v_1.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64SRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_2.AuxInt != 8 {
                        break
                }
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstorezero [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVBstorezeroidx ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               _ = x.Args[3]
+               if ptr != x.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVBstorezeroidx)
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,1) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstorezero [min(i,j)] {s} ptr0 mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstoreidx_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBstoreidx ptr (ADDconst [3] idx) w x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstoreidx ptr idx (REVW <w.Type> w) mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVBstorezero {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               j := x.AuxInt
-               if x.Aux != s {
+               if v_1.AuxInt != 3 {
                        break
                }
-               _ = x.Args[1]
-               ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 1) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               idx := v_1.Args[0]
+               w := v.Args[2]
+               x0 := v.Args[3]
+               if x0.Op != OpARM64MOVBstoreidx {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstorezeroidx ptr1 idx1 mem)
-       for {
-               if v.AuxInt != 1 {
+               _ = x0.Args[3]
+               if ptr != x0.Args[0] {
                        break
                }
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVBstorezeroidx {
+               if x0_1.AuxInt != 2 {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               if idx != x0_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVHstorezeroidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBstorezeroidx_0(v *Value) bool {
-       // match: (MOVBstorezeroidx ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVBstorezero [c] ptr mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpARM64UBFX {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstorezeroidx (MOVDconst [c]) idx mem)
-       // cond:
-       // result: (MOVBstorezero [c] idx mem)
-       for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if x0_2.AuxInt != arm64BFAuxInt(8, 24) {
                        break
                }
-               c := v_0.AuxInt
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if w != x0_2.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               x1 := x0.Args[3]
+               if x1.Op != OpARM64MOVBstoreidx {
                        break
                }
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVDloadidx ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               _ = x1.Args[3]
+               if ptr != x1.Args[0] {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               v.reset(OpARM64MOVDloadidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if x1_1.AuxInt != 1 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if idx != x1_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDstorezero {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpARM64UBFX {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if x1_2.AuxInt != arm64BFAuxInt(16, 16) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x2 := x1.Args[3]
+               if x2.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x2.Args[3]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpARM64UBFX {
+                       break
+               }
+               if x2_2.AuxInt != arm64BFAuxInt(24, 8) {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               mem := x2.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDloadidx_0(v *Value) bool {
-       // match: (MOVDloadidx ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVDload [c] ptr mem)
+       // match: (MOVBstoreidx ptr idx w x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstoreidx ptr idx w mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               idx := v.Args[1]
+               w := v.Args[2]
+               x0 := v.Args[3]
+               if x0.Op != OpARM64MOVBstoreidx {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = c
+               _ = x0.Args[3]
+               if ptr != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x0_1.AuxInt != 1 {
+                       break
+               }
+               if idx != x0_1.Args[0] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpARM64UBFX {
+                       break
+               }
+               if x0_2.AuxInt != arm64BFAuxInt(8, 24) {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               x1 := x0.Args[3]
+               if x1.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x1.Args[3]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x1_1.AuxInt != 2 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpARM64UBFX {
+                       break
+               }
+               if x1_2.AuxInt != arm64BFAuxInt(16, 16) {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x2 := x1.Args[3]
+               if x2.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x2.Args[3]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x2_1.AuxInt != 3 {
+                       break
+               }
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpARM64UBFX {
+                       break
+               }
+               if x2_2.AuxInt != arm64BFAuxInt(24, 8) {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               mem := x2.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDloadidx (MOVDconst [c]) ptr mem)
-       // cond:
-       // result: (MOVDload [c] ptr mem)
+       // match: (MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(8, 8)] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstoreidx ptr idx (REV16W <w.Type> w) mem)
        for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = c
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               if ptr != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64UBFX {
+                       break
+               }
+               if x_2.AuxInt != arm64BFAuxInt(8, 8) {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
+       // match: (MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 8)] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstoreidx ptr idx w mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDstorezeroidx {
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               _ = x.Args[3]
+               if ptr != x.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDreg_0(v *Value) bool {
-       // match: (MOVDreg x)
-       // cond: x.Uses == 1
-       // result: (MOVDnop x)
-       for {
-               x := v.Args[0]
-               if !(x.Uses == 1) {
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64ADDconst {
                        break
                }
-               v.reset(OpARM64MOVDnop)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if x_1.AuxInt != 1 {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c
+               if idx != x_1.Args[0] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64UBFX {
+                       break
+               }
+               if x_2.AuxInt != arm64BFAuxInt(8, 8) {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDstore_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVDstore ptr (FMOVDfpgp val) mem)
-       // cond:
-       // result: (FMOVDstore ptr val mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FMOVDfpgp {
-                       break
-               }
-               val := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64FMOVDstore)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVBstorezero [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDconst {
                        break
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
+               v.reset(OpARM64MOVBstorezero)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem)
+       // match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstorezero [off] {sym} (ADD ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVDstoreidx ptr idx val mem)
+       // result: (MOVBstorezeroidx ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
@@ -9467,127 +9478,147 @@ func rewriteValueARM64_OpARM64MOVDstore_0(v *Value) bool {
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
+               v.reset(OpARM64MOVBstorezeroidx)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // match: (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,1) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstorezero [min(i,j)] {s} ptr0 mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVBstorezero {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               j := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 1) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVDstorezero [off] {sym} ptr mem)
+       // match: (MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstorezeroidx ptr1 idx1 mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v.AuxInt != 1 {
                        break
                }
-               if v_1.AuxInt != 0 {
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVBstorezeroidx {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezeroidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDstoreidx_0(v *Value) bool {
-       // match: (MOVDstoreidx ptr (MOVDconst [c]) val mem)
+func rewriteValueARM64_OpARM64MOVBstorezeroidx_0(v *Value) bool {
+       // match: (MOVBstorezeroidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVDstore [c] ptr val mem)
+       // result: (MOVBstorezero [c] ptr mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVDstore)
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstorezero)
                v.AuxInt = c
                v.AddArg(ptr)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstoreidx (MOVDconst [c]) idx val mem)
+       // match: (MOVBstorezeroidx (MOVDconst [c]) idx mem)
        // cond:
-       // result: (MOVDstore [c] idx val mem)
+       // result: (MOVBstorezero [c] idx mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
                idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVDstore)
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstorezero)
                v.AuxInt = c
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstoreidx ptr idx (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVDstorezeroidx ptr idx mem)
+       // match: (MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstorezeroidx ptr idx mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               if v_2.AuxInt != 0 {
+               if v_1.AuxInt != 1 {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVDstorezeroidx)
+               idx := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstorezeroidx {
+                       break
+               }
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezeroidx)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
@@ -9595,14 +9626,14 @@ func rewriteValueARM64_OpARM64MOVDstoreidx_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstorezero [off1+off2] {sym} ptr mem)
+       // result: (MOVDload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -9617,47 +9648,49 @@ func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
+               v.reset(OpARM64MOVDload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MOVDload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDloadidx ptr idx mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
+               _ = v_0.Args[1]
                ptr := v_0.Args[0]
+               idx := v_0.Args[1]
                mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpARM64MOVDloadidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezero [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVDstorezeroidx ptr idx mem)
+       // result: (MOVDloadidx8 ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 3 {
                        break
                }
                _ = v_0.Args[1]
@@ -9667,81 +9700,66 @@ func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx)
+               v.reset(OpARM64MOVDloadidx8)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,8) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVQstorezero [min(i,j)] {s} ptr0 mem)
+       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
-               ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVDstorezero {
-                       break
-               }
-               j := x.AuxInt
-               if x.Aux != s {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               _ = x.Args[1]
-               ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 8) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
-               v.AddArg(ptr0)
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVQstorezero [0] {s} p0 mem)
+       // match: (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
        for {
-               if v.AuxInt != 8 {
-                       break
-               }
-               s := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               p0 := v.Args[0]
-               if p0.Op != OpARM64ADD {
-                       break
-               }
-               _ = p0.Args[1]
-               ptr0 := p0.Args[0]
-               idx0 := p0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVDstorezeroidx {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDstorezero {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
+               v.reset(OpARM64MOVDconst)
                v.AuxInt = 0
-               v.Aux = s
-               v.AddArg(p0)
-               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDstorezeroidx_0(v *Value) bool {
-       // match: (MOVDstorezeroidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVDloadidx_0(v *Value) bool {
+       // match: (MOVDloadidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVDstorezero [c] ptr mem)
+       // result: (MOVDload [c] ptr mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -9751,15 +9769,15 @@ func rewriteValueARM64_OpARM64MOVDstorezeroidx_0(v *Value) bool {
                }
                c := v_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpARM64MOVDstorezero)
+               v.reset(OpARM64MOVDload)
                v.AuxInt = c
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezeroidx (MOVDconst [c]) idx mem)
+       // match: (MOVDloadidx (MOVDconst [c]) ptr mem)
        // cond:
-       // result: (MOVDstorezero [c] idx mem)
+       // result: (MOVDload [c] ptr mem)
        for {
                _ = v.Args[2]
                v_0 := v.Args[0]
@@ -9767,111 +9785,71 @@ func rewriteValueARM64_OpARM64MOVDstorezeroidx_0(v *Value) bool {
                        break
                }
                c := v_0.AuxInt
-               idx := v.Args[1]
+               ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpARM64MOVDstorezero)
+               v.reset(OpARM64MOVDload)
                v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHUload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
-                       break
-               }
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHUload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVHUloadidx ptr idx mem)
+       // match: (MOVDloadidx ptr (SLLconst [3] idx) mem)
+       // cond:
+       // result: (MOVDloadidx8 ptr idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               if v_1.AuxInt != 3 {
                        break
                }
-               v.reset(OpARM64MOVHUloadidx)
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDloadidx8)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MOVDloadidx (SLLconst [3] idx) ptr mem)
+       // cond:
+       // result: (MOVDloadidx8 ptr idx mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_0.AuxInt != 3 {
                        break
                }
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDloadidx8)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // match: (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
        // result: (MOVDconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHstorezero {
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDstorezeroidx {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
                v.reset(OpARM64MOVDconst)
@@ -9880,10 +9858,10 @@ func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHUloadidx_0(v *Value) bool {
-       // match: (MOVHUloadidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVDloadidx8_0(v *Value) bool {
+       // match: (MOVDloadidx8 ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVHUload [c] ptr mem)
+       // result: (MOVDload [c<<3] ptr mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -9893,45 +9871,27 @@ func rewriteValueARM64_OpARM64MOVHUloadidx_0(v *Value) bool {
                }
                c := v_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHUloadidx (MOVDconst [c]) ptr mem)
-       // cond:
-       // result: (MOVHUload [c] ptr mem)
-       for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = c
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = c << 3
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // match: (MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
        // result: (MOVDconst [0])
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHstorezeroidx {
+               if v_2.Op != OpARM64MOVDstorezeroidx8 {
                        break
                }
                _ = v_2.Args[2]
                ptr2 := v_2.Args[0]
                idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
                        break
                }
                v.reset(OpARM64MOVDconst)
@@ -9940,209 +9900,145 @@ func rewriteValueARM64_OpARM64MOVHUloadidx_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHUreg_0(v *Value) bool {
-       // match: (MOVHUreg x:(MOVBUload _ _))
-       // cond:
-       // result: (MOVDreg x)
+func rewriteValueARM64_OpARM64MOVDreg_0(v *Value) bool {
+       // match: (MOVDreg x)
+       // cond: x.Uses == 1
+       // result: (MOVDnop x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               if !(x.Uses == 1) {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               v.reset(OpARM64MOVDnop)
                v.AddArg(x)
                return true
        }
-       // match: (MOVHUreg x:(MOVHUload _ _))
+       // match: (MOVDreg (MOVDconst [c]))
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDconst [c])
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUload {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c
                return true
        }
-       // match: (MOVHUreg x:(MOVBUloadidx _ _ _))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstore_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVDstore ptr (FMOVDfpgp val) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (FMOVDstore ptr val mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FMOVDfpgp {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               val := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64FMOVDstore)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHUreg x:(MOVHUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVDstore [off1+off2] {sym} ptr val mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg x:(MOVBUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg x:(MOVHUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg (ANDconst [c] x))
-       // cond:
-       // result: (ANDconst [c&(1<<16-1)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<16 - 1)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(uint16(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint16(c))
-               return true
-       }
-       // match: (MOVHUreg (SLLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<16-1, sc)
-       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<16-1, sc)) {
-                       break
-               }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg (SRLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<16-1, 0)
-       // result: (UBFX [arm64BFAuxInt(sc, 16)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
-                       break
-               }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<16-1, 0)) {
-                       break
-               }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, 16)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHload [off1+off2] {sym} ptr mem)
+       // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstoreidx ptr idx val mem)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               off2 := v_0.AuxInt
+               _ = v_0.Args[1]
                ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
+               v.reset(OpARM64MOVDstoreidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHload [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVHloadidx ptr idx mem)
+       // result: (MOVDstoreidx8 ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 3 {
                        break
                }
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHloadidx)
+               v.reset(OpARM64MOVDstoreidx8)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDaddr {
                        break
@@ -10150,317 +10046,234 @@ func rewriteValueARM64_OpARM64MOVHload_0(v *Value) bool {
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVHload)
+               v.reset(OpARM64MOVDstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
+       // match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVDstorezero [off] {sym} ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHstorezero {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHloadidx_0(v *Value) bool {
-       // match: (MOVHloadidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVDstoreidx_0(v *Value) bool {
+       // match: (MOVDstoreidx ptr (MOVDconst [c]) val mem)
        // cond:
-       // result: (MOVHload [c] ptr mem)
+       // result: (MOVDstore [c] ptr val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHload)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstore)
                v.AuxInt = c
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHloadidx (MOVDconst [c]) ptr mem)
+       // match: (MOVDstoreidx (MOVDconst [c]) idx val mem)
        // cond:
-       // result: (MOVHload [c] ptr mem)
+       // result: (MOVDstore [c] idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHload)
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstore)
                v.AuxInt = c
-               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
+       // match: (MOVDstoreidx ptr (SLLconst [3] idx) val mem)
+       // cond:
+       // result: (MOVDstoreidx8 ptr idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHstorezeroidx {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               if v_1.AuxInt != 3 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstoreidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHreg_0(v *Value) bool {
-       // match: (MOVHreg x:(MOVBload _ _))
+       // match: (MOVDstoreidx (SLLconst [3] idx) ptr val mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDstoreidx8 ptr idx val mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBload {
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBUload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               if v_0.AuxInt != 3 {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstoreidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(MOVHload _ _))
+       // match: (MOVDstoreidx ptr idx (MOVDconst [0]) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDstorezeroidx ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHload {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBloadidx {
-                       break
-               }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVHloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHloadidx {
-                       break
-               }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               if v_2.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(MOVHreg _))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstoreidx8_0(v *Value) bool {
+       // match: (MOVDstoreidx8 ptr (MOVDconst [c]) val mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDstore [c<<3] ptr val mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHreg {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = c << 3
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHreg (MOVDconst [c]))
+       // match: (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem)
        // cond:
-       // result: (MOVDconst [int64(int16(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int16(c))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHreg_10(v *Value) bool {
-       // match: (MOVHreg (SLLconst [lc] x))
-       // cond: lc < 16
-       // result: (SBFIZ [arm64BFAuxInt(lc, 16-lc)] x)
+       // result: (MOVDstorezeroidx8 ptr idx mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc < 16) {
+               if v_2.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BFAuxInt(lc, 16-lc)
-               v.AddArg(x)
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstorezeroidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVDstorezero [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDconst {
                        break
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
+               v.reset(OpARM64MOVDstorezero)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVHstoreidx ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
-                       break
-               }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDaddr {
                        break
@@ -10468,894 +10281,736 @@ func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool {
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
+               v.reset(OpARM64MOVDstorezero)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVHstorezero [off] {sym} ptr mem)
+       // match: (MOVDstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstorezeroidx ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               if v_1.AuxInt != 0 {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpARM64MOVDstorezeroidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
+       // match: (MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstorezeroidx8 ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHreg {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} ptr (MOVHUreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHUreg {
+               if v_0.AuxInt != 3 {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezeroidx8)
                v.AddArg(ptr)
-               v.AddArg(x)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVWreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWreg {
-                       break
-               }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} ptr (MOVWUreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWUreg {
-                       break
-               }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       // match: (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,8) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVQstorezero [min(i,j)] {s} ptr0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
+               x := v.Args[1]
+               if x.Op != OpARM64MOVDstorezero {
                        break
                }
+               j := x.AuxInt
                if x.Aux != s {
                        break
                }
-               _ = x.Args[2]
+               _ = x.Args[1]
                ptr1 := x.Args[0]
-               if w != x.Args[1] {
-                       break
-               }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 8) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = min(i, j)
                v.Aux = s
                v.AddArg(ptr0)
-               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
+       // match: (MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
        // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w mem)
+       // result: (MOVQstorezero [0] {s} p0 mem)
        for {
-               if v.AuxInt != 2 {
+               if v.AuxInt != 8 {
                        break
                }
                s := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 16 {
+               _ = v.Args[1]
+               p0 := v.Args[0]
+               if p0.Op != OpARM64ADD {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx {
+               _ = p0.Args[1]
+               ptr0 := p0.Args[0]
+               idx0 := p0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVDstorezeroidx {
                        break
                }
-               _ = x.Args[3]
+               _ = x.Args[2]
                ptr1 := x.Args[0]
                idx1 := x.Args[1]
-               if w != x.Args[2] {
-                       break
-               }
-               mem := x.Args[3]
+               mem := x.Args[2]
                if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w)
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = 0
+               v.Aux = s
+               v.AddArg(p0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstore_10(v *Value) bool {
-       // match: (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       // match: (MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVQstorezero [0] {s} p0 mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
-                       break
-               }
-               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
+               if v.AuxInt != 8 {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
+               s := v.Aux
+               _ = v.Args[1]
+               p0 := v.Args[0]
+               if p0.Op != OpARM64ADDshiftLL {
                        break
                }
-               if x.AuxInt != i-2 {
+               if p0.AuxInt != 3 {
                        break
                }
-               if x.Aux != s {
+               _ = p0.Args[1]
+               ptr0 := p0.Args[0]
+               idx0 := p0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVDstorezeroidx8 {
                        break
                }
                _ = x.Args[2]
                ptr1 := x.Args[0]
-               if w != x.Args[1] {
-                       break
-               }
+               idx1 := x.Args[1]
                mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = 0
                v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
+               v.AddArg(p0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstorezeroidx_0(v *Value) bool {
+       // match: (MOVDstorezeroidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVDstorezero [c] ptr mem)
        for {
-               if v.AuxInt != 2 {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               s := v.Aux
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezeroidx (MOVDconst [c]) idx mem)
+       // cond:
+       // result: (MOVDstorezero [c] idx mem)
+       for {
                _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezeroidx ptr (SLLconst [3] idx) mem)
+       // cond:
+       // result: (MOVDstorezeroidx8 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
-                       break
-               }
-               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx {
+               if v_1.AuxInt != 3 {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezeroidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezeroidx (SLLconst [3] idx) ptr mem)
+       // cond:
+       // result: (MOVDstorezeroidx8 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               if v_0.AuxInt != 3 {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezeroidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstorezeroidx8_0(v *Value) bool {
+       // match: (MOVDstorezeroidx8 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVDstorezero [c<<3] ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
                _ = v.Args[2]
-               ptr0 := v.Args[0]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 16 {
-                       break
-               }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               w := v_1_0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = c << 3
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVHUload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w mem)
+       // match: (MOVHUload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHUloadidx ptr idx mem)
        for {
-               if v.AuxInt != 2 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
                }
                _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 16 {
-                       break
-               }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
-                       break
-               }
-               w := v_1_0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx {
-                       break
-               }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
-                       break
-               }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w)
+               v.reset(OpARM64MOVHUloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w0 mem)
+       // match: (MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHUloadidx2 ptr idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst {
-                       break
-               }
-               if w0.AuxInt != j-16 {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               if w != w0.Args[0] {
+               if v_0.AuxInt != 1 {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w0)
+               v.reset(OpARM64MOVHUloadidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w0 mem)
+       // match: (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               if v.AuxInt != 2 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx {
-                       break
-               }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst {
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               if w0.AuxInt != j-16 {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               if w != w0.Args[0] {
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHstorezero {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHstoreidx_0(v *Value) bool {
-       // match: (MOVHstoreidx ptr (MOVDconst [c]) val mem)
+func rewriteValueARM64_OpARM64MOVHUloadidx_0(v *Value) bool {
+       // match: (MOVHUloadidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVHstore [c] ptr val mem)
+       // result: (MOVHUload [c] ptr mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstore)
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUload)
                v.AuxInt = c
                v.AddArg(ptr)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx (MOVDconst [c]) idx val mem)
+       // match: (MOVHUloadidx (MOVDconst [c]) ptr mem)
        // cond:
-       // result: (MOVHstore [c] idx val mem)
+       // result: (MOVHUload [c] ptr mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstore)
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUload)
                v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(val)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVDconst [0]) mem)
+       // match: (MOVHUloadidx ptr (SLLconst [1] idx) mem)
        // cond:
-       // result: (MOVHstorezeroidx ptr idx mem)
+       // result: (MOVHUloadidx2 ptr idx mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               if v_2.AuxInt != 0 {
+               if v_1.AuxInt != 1 {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstorezeroidx)
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUloadidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVHreg x) mem)
+       // match: (MOVHUloadidx ptr (ADD idx idx) mem)
        // cond:
-       // result: (MOVHstoreidx ptr idx x mem)
+       // result: (MOVHUloadidx2 ptr idx mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHreg {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADD {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx)
+               _ = v_1.Args[1]
+               idx := v_1.Args[0]
+               if idx != v_1.Args[1] {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUloadidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVHUreg x) mem)
+       // match: (MOVHUloadidx (ADD idx idx) ptr mem)
        // cond:
-       // result: (MOVHstoreidx ptr idx x mem)
+       // result: (MOVHUloadidx2 ptr idx mem)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHUreg {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx)
+               _ = v_0.Args[1]
+               idx := v_0.Args[0]
+               if idx != v_0.Args[1] {
+                       break
+               }
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUloadidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVWreg x) mem)
-       // cond:
-       // result: (MOVHstoreidx ptr idx x mem)
+       // match: (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWreg {
+               if v_2.Op != OpARM64MOVHstorezeroidx {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(x)
-               v.AddArg(mem)
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVWUreg x) mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUloadidx2_0(v *Value) bool {
+       // match: (MOVHUloadidx2 ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVHstoreidx ptr idx x mem)
+       // result: (MOVHUload [c<<1] ptr mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWUreg {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = c << 1
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstorezero [off1+off2] {sym} ptr mem)
+       // match: (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHstorezeroidx2 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUreg_0(v *Value) bool {
+       // match: (MOVHUreg x:(MOVBUload _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVHstorezero [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVHstorezeroidx ptr idx mem)
+       // match: (MOVHUreg x:(MOVHUload _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUload {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
-               v.reset(OpARM64MOVHstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,2) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstorezero [min(i,j)] {s} ptr0 mem)
+       // match: (MOVHUreg x:(MOVHUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVHstorezero {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx {
                        break
                }
-               j := x.AuxInt
-               if x.Aux != s {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg x:(MOVHUloadidx2 _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx2 {
                        break
                }
-               _ = x.Args[1]
-               ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 2) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg x:(MOVBUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
                        break
                }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstorezeroidx ptr1 idx1 mem)
+       // match: (MOVHUreg x:(MOVHUreg _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               if v.AuxInt != 2 {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUreg {
                        break
                }
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVHstorezeroidx {
-                       break
-               }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool {
-       // match: (MOVHstorezeroidx ptr (MOVDconst [c]) mem)
+       // match: (MOVHUreg (ANDconst [c] x))
        // cond:
-       // result: (MOVHstorezero [c] ptr mem)
+       // result: (ANDconst [c&(1<<16-1)] x)
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<16 - 1)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem)
+       // match: (MOVHUreg (MOVDconst [c]))
        // cond:
-       // result: (MOVHstorezero [c] idx mem)
+       // result: (MOVDconst [int64(uint16(c))])
        for {
-               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint16(c))
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVQstorezero [off1+off2] {sym} ptr mem)
+       // match: (MOVHUreg (SLLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<16-1, sc)
+       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<16-1, sc)) {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))
+               v.AddArg(x)
                return true
        }
-       // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUreg_10(v *Value) bool {
+       // match: (MOVHUreg (SRLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<16-1, 0)
+       // result: (UBFX [arm64BFAuxInt(sc, 16)] x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<16-1, 0)) {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, 16)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHload_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWUload [off1+off2] {sym} ptr mem)
+       // result: (MOVHload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -11370,16 +11025,16 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWUload)
+               v.reset(OpARM64MOVHload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVHload [off] {sym} (ADD ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVWUloadidx ptr idx mem)
+       // result: (MOVHloadidx ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -11395,15 +11050,42 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWUloadidx)
+               v.reset(OpARM64MOVHloadidx)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVHload [off] {sym} (ADDshiftLL [1] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHloadidx2 ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVHloadidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -11419,14 +11101,14 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWUload)
+               v.reset(OpARM64MOVHload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // match: (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
        // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
        // result: (MOVDconst [0])
        for {
@@ -11435,7 +11117,7 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
                _ = v.Args[1]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWstorezero {
+               if v_1.Op != OpARM64MOVHstorezero {
                        break
                }
                off2 := v_1.AuxInt
@@ -11451,10 +11133,10 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
-       // match: (MOVWUloadidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVHloadidx_0(v *Value) bool {
+       // match: (MOVHloadidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVWUload [c] ptr mem)
+       // result: (MOVHload [c] ptr mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -11464,15 +11146,15 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
                }
                c := v_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpARM64MOVWUload)
+               v.reset(OpARM64MOVHload)
                v.AuxInt = c
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx (MOVDconst [c]) ptr mem)
+       // match: (MOVHloadidx (MOVDconst [c]) ptr mem)
        // cond:
-       // result: (MOVWUload [c] ptr mem)
+       // result: (MOVHload [c] ptr mem)
        for {
                _ = v.Args[2]
                v_0 := v.Args[0]
@@ -11482,13 +11164,78 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
                c := v_0.AuxInt
                ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpARM64MOVWUload)
+               v.reset(OpARM64MOVHload)
                v.AuxInt = c
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
+       // match: (MOVHloadidx ptr (SLLconst [1] idx) mem)
+       // cond:
+       // result: (MOVHloadidx2 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHloadidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHloadidx ptr (ADD idx idx) mem)
+       // cond:
+       // result: (MOVHloadidx2 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_1.Args[1]
+               idx := v_1.Args[0]
+               if idx != v_1.Args[1] {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHloadidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHloadidx (ADD idx idx) ptr mem)
+       // cond:
+       // result: (MOVHloadidx2 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               idx := v_0.Args[0]
+               if idx != v_0.Args[1] {
+                       break
+               }
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHloadidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
        // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
        // result: (MOVDconst [0])
        for {
@@ -11496,7 +11243,7 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
                ptr := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWstorezeroidx {
+               if v_2.Op != OpARM64MOVHstorezeroidx {
                        break
                }
                _ = v_2.Args[2]
@@ -11511,26 +11258,55 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
-       // match: (MOVWUreg x:(MOVBUload _ _))
+func rewriteValueARM64_OpARM64MOVHloadidx2_0(v *Value) bool {
+       // match: (MOVHloadidx2 ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVHload [c<<1] ptr mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHload)
+               v.AuxInt = c << 1
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHstorezeroidx2 {
+                       break
+               }
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHreg_0(v *Value) bool {
+       // match: (MOVHreg x:(MOVBload _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVHUload {
+               if x.Op != OpARM64MOVBload {
                        break
                }
                _ = x.Args[1]
@@ -11538,12 +11314,12 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVWUload _ _))
+       // match: (MOVHreg x:(MOVBUload _ _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVWUload {
+               if x.Op != OpARM64MOVBUload {
                        break
                }
                _ = x.Args[1]
@@ -11551,12 +11327,25 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVBUloadidx _ _ _))
+       // match: (MOVHreg x:(MOVHload _ _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
+               if x.Op != OpARM64MOVHload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVBloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBloadidx {
                        break
                }
                _ = x.Args[2]
@@ -11564,12 +11353,12 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUloadidx _ _ _))
+       // match: (MOVHreg x:(MOVBUloadidx _ _ _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx {
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
                _ = x.Args[2]
@@ -11577,12 +11366,12 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVWUloadidx _ _ _))
+       // match: (MOVHreg x:(MOVHloadidx _ _ _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVWUloadidx {
+               if x.Op != OpARM64MOVHloadidx {
                        break
                }
                _ = x.Args[2]
@@ -11590,63 +11379,61 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVBUreg _))
+       // match: (MOVHreg x:(MOVHloadidx2 _ _ _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               if x.Op != OpARM64MOVHloadidx2 {
                        break
                }
+               _ = x.Args[2]
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUreg _))
+       // match: (MOVHreg x:(MOVBreg _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVHUreg {
+               if x.Op != OpARM64MOVBreg {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVWUreg _))
+       // match: (MOVHreg x:(MOVBUreg _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVWUreg {
+               if x.Op != OpARM64MOVBUreg {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg (ANDconst [c] x))
+       // match: (MOVHreg x:(MOVHreg _))
        // cond:
-       // result: (ANDconst [c&(1<<32-1)] x)
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHreg {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<32 - 1)
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool {
-       // match: (MOVWUreg (MOVDconst [c]))
+func rewriteValueARM64_OpARM64MOVHreg_10(v *Value) bool {
+       // match: (MOVHreg (MOVDconst [c]))
        // cond:
-       // result: (MOVDconst [int64(uint32(c))])
+       // result: (MOVDconst [int64(int16(c))])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -11654,83 +11441,67 @@ func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool {
                }
                c := v_0.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint32(c))
+               v.AuxInt = int64(int16(c))
                return true
        }
-       // match: (MOVWUreg (SLLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<32-1, sc)
-       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
+       // match: (MOVHreg (SLLconst [lc] x))
+       // cond: lc < 16
+       // result: (SBFIZ [arm64BFAuxInt(lc, 16-lc)] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64SLLconst {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<32-1, sc)) {
-                       break
-               }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg (SRLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<32-1, 0)
-       // result: (UBFX [arm64BFAuxInt(sc, 32)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
-                       break
-               }
-               sc := v_0.AuxInt
+               lc := v_0.AuxInt
                x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<32-1, 0)) {
+               if !(lc < 16) {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, 32)
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BFAuxInt(lc, 16-lc)
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWload [off1+off2] {sym} ptr mem)
+       // result: (MOVHstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDconst {
                        break
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWload)
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVWloadidx ptr idx mem)
+       // result: (MOVHstoreidx ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
@@ -11738,23 +11509,54 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWloadidx)
+               v.reset(OpARM64MOVHstoreidx)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHstoreidx2 ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDaddr {
                        break
@@ -11762,501 +11564,295 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWload)
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
+       // match: (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVHstorezero [off] {sym} ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWstorezero {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool {
-       // match: (MOVWloadidx ptr (MOVDconst [c]) mem)
+       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
        // cond:
-       // result: (MOVWload [c] ptr mem)
+       // result: (MOVHstore [off] {sym} ptr x mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64MOVHreg {
                        break
                }
-               c := v_1.AuxInt
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = c
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(ptr)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWloadidx (MOVDconst [c]) ptr mem)
+       // match: (MOVHstore [off] {sym} ptr (MOVHUreg x) mem)
        // cond:
-       // result: (MOVWload [c] ptr mem)
+       // result: (MOVHstore [off] {sym} ptr x mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHUreg {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v.Args[1]
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = c
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(ptr)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
+       // match: (MOVHstore [off] {sym} ptr (MOVWreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWstorezeroidx {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWreg {
                        break
                }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool {
-       // match: (MOVWreg x:(MOVBload _ _))
+       // match: (MOVHstore [off] {sym} ptr (MOVWUreg x) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVHstore [off] {sym} ptr x mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBload {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWUreg {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWreg x:(MOVBUload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstore [i-2] {s} ptr0 w mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHload {
+               if v_1.AuxInt != 16 {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHUload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUload {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstore {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWload {
+               if x.AuxInt != i-2 {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBloadidx {
+               if x.Aux != s {
                        break
                }
                _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHloadidx {
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWreg x:(MOVHUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstore_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstoreidx ptr1 idx1 w mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx {
+               if v.AuxInt != 2 {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWloadidx {
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool {
-       // match: (MOVWreg x:(MOVBreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBreg {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               if v_1.AuxInt != 16 {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHreg {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHreg {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWreg {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c))])
+       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
        for {
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c))
-               return true
-       }
-       // match: (MOVWreg (SLLconst [lc] x))
-       // cond: lc < 32
-       // result: (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc < 32) {
-                       break
-               }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BFAuxInt(lc, 32-lc)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstoreidx ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.AuxInt != 1 {
                        break
                }
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
-                       break
-               }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVWstorezero [off] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.AuxInt != 16 {
                        break
                }
-               if v_1.AuxInt != 0 {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx2 {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWreg {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWUreg {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
+       // match: (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
        // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstore [i-4] {s} ptr0 w mem)
+       // result: (MOVWstore [i-2] {s} ptr0 w mem)
        for {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
                ptr0 := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v_1.Op != OpARM64UBFX {
                        break
                }
-               if v_1.AuxInt != 32 {
+               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
                        break
                }
                w := v_1.Args[0]
                x := v.Args[2]
-               if x.Op != OpARM64MOVWstore {
+               if x.Op != OpARM64MOVHstore {
                        break
                }
-               if x.AuxInt != i-4 {
+               if x.AuxInt != i-2 {
                        break
                }
                if x.Aux != s {
@@ -12271,19 +11867,19 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
                if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = i - 4
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
                v.Aux = s
                v.AddArg(ptr0)
                v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
        // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstoreidx ptr1 idx1 w mem)
+       // result: (MOVWstoreidx ptr1 idx1 w mem)
        for {
-               if v.AuxInt != 4 {
+               if v.AuxInt != 2 {
                        break
                }
                s := v.Aux
@@ -12296,15 +11892,15 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
                ptr0 := v_0.Args[0]
                idx0 := v_0.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v_1.Op != OpARM64UBFX {
                        break
                }
-               if v_1.AuxInt != 32 {
+               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
                        break
                }
                w := v_1.Args[0]
                x := v.Args[2]
-               if x.Op != OpARM64MOVWstoreidx {
+               if x.Op != OpARM64MOVHstoreidx {
                        break
                }
                _ = x.Args[3]
@@ -12317,66 +11913,116 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
                if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
+               v.reset(OpARM64MOVWstoreidx)
                v.AddArg(ptr1)
                v.AddArg(idx1)
                v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstore [i-4] {s} ptr0 w0 mem)
+       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v.AuxInt != 2 {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstore {
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               if x.AuxInt != i-4 {
+               if v_0.AuxInt != 1 {
                        break
                }
-               if x.Aux != s {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst {
+               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
                        break
                }
-               if w0.AuxInt != j-32 {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx2 {
                        break
                }
-               if w != w0.Args[0] {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w0)
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
+       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
        // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstoreidx ptr1 idx1 w0 mem)
+       // result: (MOVWstoreidx ptr1 idx1 w mem)
        for {
-               if v.AuxInt != 4 {
+               if v.AuxInt != 2 {
                        break
                }
                s := v.Aux
@@ -12392,1907 +12038,6324 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
                if v_1.Op != OpARM64SRLconst {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstoreidx {
+               if v_1.AuxInt != 16 {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               if w0.AuxInt != j-32 {
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx {
                        break
                }
-               if w != w0.Args[0] {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
                        break
                }
                mem := x.Args[3]
                if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
+               v.reset(OpARM64MOVWstoreidx)
                v.AddArg(ptr1)
                v.AddArg(idx1)
-               v.AddArg(w0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstoreidx_0(v *Value) bool {
-       // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem)
-       // cond:
-       // result: (MOVWstore [c] ptr val mem)
+       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v.AuxInt != 2 {
                        break
                }
-               c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx (MOVDconst [c]) idx val mem)
-       // cond:
-       // result: (MOVWstore [c] idx val mem)
-       for {
-               _ = v.Args[3]
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               c := v_0.AuxInt
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVWstorezeroidx ptr idx mem)
-       for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
+               if v_0.AuxInt != 1 {
                        break
                }
-               if v_2.AuxInt != 0 {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx ptr idx (MOVWreg x) mem)
-       // cond:
-       // result: (MOVWstoreidx ptr idx x mem)
-       for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWreg {
+               if v_1.AuxInt != 16 {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem)
-       // cond:
-       // result: (MOVWstoreidx ptr idx x mem)
-       for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWUreg {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx2 {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
                v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(x)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstorezero [off1+off2] {sym} ptr mem)
+       // match: (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstore [i-2] {s} ptr0 w0 mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstore {
                        break
                }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if x.AuxInt != i-2 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if x.Aux != s {
                        break
                }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstorezeroidx ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpARM64SRLconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               if w0.AuxInt != j-16 {
                        break
                }
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem)
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstoreidx ptr1 idx1 w0 mem)
        for {
-               i := v.AuxInt
+               if v.AuxInt != 2 {
+                       break
+               }
                s := v.Aux
-               _ = v.Args[1]
-               ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVWstorezero {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               j := x.AuxInt
-               if x.Aux != s {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = x.Args[1]
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx {
+                       break
+               }
+               _ = x.Args[3]
                ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
-               v.AddArg(ptr0)
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstorezeroidx ptr1 idx1 mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstore_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
        for {
-               if v.AuxInt != 4 {
+               if v.AuxInt != 2 {
                        break
                }
                s := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 1 {
                        break
                }
                _ = v_0.Args[1]
                ptr0 := v_0.Args[0]
                idx0 := v_0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVWstorezeroidx {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = x.Args[2]
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx2 {
+                       break
+               }
+               _ = x.Args[3]
                ptr1 := x.Args[0]
                idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx)
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
                v.AddArg(ptr1)
-               v.AddArg(idx1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v *Value) bool {
-       // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVHstoreidx_0(v *Value) bool {
+       // match: (MOVHstoreidx ptr (MOVDconst [c]) val mem)
        // cond:
-       // result: (MOVWstorezero [c] ptr mem)
+       // result: (MOVHstore [c] ptr val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = c
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem)
+       // match: (MOVHstoreidx (MOVDconst [c]) idx val mem)
        // cond:
-       // result: (MOVWstorezero [c] idx mem)
+       // result: (MOVHstore [c] idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
                idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = c
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MUL_0(v *Value) bool {
-       // match: (MUL (NEG x) y)
+       // match: (MOVHstoreidx ptr (SLLconst [1] idx) val mem)
        // cond:
-       // result: (MNEG x y)
+       // result: (MOVHstoreidx2 ptr idx val mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64NEG {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
-               v.reset(OpARM64MNEG)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (MUL y (NEG x))
-       // cond:
-       // result: (MNEG x y)
-       for {
-               _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64NEG {
+               if v_1.AuxInt != 1 {
                        break
                }
-               x := v_1.Args[0]
-               v.reset(OpARM64MNEG)
-               v.AddArg(x)
-               v.AddArg(y)
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL x (MOVDconst [-1]))
+       // match: (MOVHstoreidx ptr (ADD idx idx) val mem)
        // cond:
-       // result: (NEG x)
+       // result: (MOVHstoreidx2 ptr idx val mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[3]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64ADD {
                        break
                }
-               if v_1.AuxInt != -1 {
+               _ = v_1.Args[1]
+               idx := v_1.Args[0]
+               if idx != v_1.Args[1] {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [-1]) x)
+       // match: (MOVHstoreidx (SLLconst [1] idx) ptr val mem)
        // cond:
-       // result: (NEG x)
+       // result: (MOVHstoreidx2 ptr idx val mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[3]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               if v_0.AuxInt != -1 {
+               if v_0.AuxInt != 1 {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL _ (MOVDconst [0]))
+       // match: (MOVHstoreidx (ADD idx idx) ptr val mem)
        // cond:
-       // result: (MOVDconst [0])
+       // result: (MOVHstoreidx2 ptr idx val mem)
        for {
-               _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               if v_1.AuxInt != 0 {
+               _ = v_0.Args[1]
+               idx := v_0.Args[0]
+               if idx != v_0.Args[1] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [0]) _)
+       // match: (MOVHstoreidx ptr idx (MOVDconst [0]) mem)
        // cond:
-       // result: (MOVDconst [0])
+       // result: (MOVHstorezeroidx ptr idx mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 0 {
+               if v_2.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL x (MOVDconst [1]))
+       // match: (MOVHstoreidx ptr idx (MOVHreg x) mem)
        // cond:
-       // result: x
+       // result: (MOVHstoreidx ptr idx x mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHreg {
                        break
                }
-               if v_1.AuxInt != 1 {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx ptr idx (MOVHUreg x) mem)
+       // cond:
+       // result: (MOVHstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHUreg {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [1]) x)
+       // match: (MOVHstoreidx ptr idx (MOVWreg x) mem)
        // cond:
-       // result: x
+       // result: (MOVHstoreidx ptr idx x mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
-               if v_0.AuxInt != 1 {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstoreidx_10(v *Value) bool {
+       // match: (MOVHstoreidx ptr idx (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVHstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx ptr idx w mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[3]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               if v_1.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               idx := v_1.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64SRLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               if v_2.AuxInt != 16 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpARM64MOVHstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               if ptr != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MUL_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && c >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
+func rewriteValueARM64_OpARM64MOVHstoreidx2_0(v *Value) bool {
+       // match: (MOVHstoreidx2 ptr (MOVDconst [c]) val mem)
+       // cond:
+       // result: (MOVHstore [c<<1] ptr val mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[3]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && c >= 3) {
-                       break
-               }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = c << 1
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && c >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
+       // match: (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && c >= 3) {
+               if v_2.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && c >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       // match: (MOVHstoreidx2 ptr idx (MOVHreg x) mem)
+       // cond:
+       // result: (MOVHstoreidx2 ptr idx x mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHreg {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && c >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       // match: (MOVHstoreidx2 ptr idx (MOVHUreg x) mem)
+       // cond:
+       // result: (MOVHstoreidx2 ptr idx x mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHUreg {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       // match: (MOVHstoreidx2 ptr idx (MOVWreg x) mem)
+       // cond:
+       // result: (MOVHstoreidx2 ptr idx x mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       // match: (MOVHstoreidx2 ptr idx (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVHstoreidx2 ptr idx x mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVHstorezero [off1+off2] {sym} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       // match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       // match: (MOVHstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHstorezeroidx ptr idx mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64MOVHstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       // match: (MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               if v_0.AuxInt != 1 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MUL_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       // match: (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,2) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstorezero [min(i,j)] {s} ptr0 mem)
        for {
+               i := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVHstorezero {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               j := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 2) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       // match: (MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstorezeroidx ptr1 idx1 mem)
        for {
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := v_0.AuxInt
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
                x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               if x.Op != OpARM64MOVHstorezeroidx {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c*d])
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
        for {
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0.AuxInt != 1 {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c * d
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVHstorezeroidx2 {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [d]) (MOVDconst [c]))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool {
+       // match: (MOVHstorezeroidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVDconst [c*d])
+       // result: (MOVHstorezero [c] ptr mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               d := v_0.AuxInt
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c * d
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW_0(v *Value) bool {
-       // match: (MULW (NEG x) y)
+       // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem)
        // cond:
-       // result: (MNEGW x y)
+       // result: (MOVHstorezero [c] idx mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64NEG {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
-               v.reset(OpARM64MNEGW)
-               v.AddArg(x)
-               v.AddArg(y)
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW y (NEG x))
+       // match: (MOVHstorezeroidx ptr (SLLconst [1] idx) mem)
        // cond:
-       // result: (MNEGW x y)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               _ = v.Args[1]
-               y := v.Args[0]
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64NEG {
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               x := v_1.Args[0]
-               v.reset(OpARM64MNEGW)
-               v.AddArg(x)
-               v.AddArg(y)
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==-1
-       // result: (NEG x)
+       // match: (MOVHstorezeroidx ptr (ADD idx idx) mem)
+       // cond:
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64ADD {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == -1) {
+               _ = v_1.Args[1]
+               idx := v_1.Args[0]
+               if idx != v_1.Args[1] {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==-1
-       // result: (NEG x)
+       // match: (MOVHstorezeroidx (SLLconst [1] idx) ptr mem)
+       // cond:
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(int32(c) == -1) {
+               if v_0.AuxInt != 1 {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW _ (MOVDconst [c]))
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       // match: (MOVHstorezeroidx (ADD idx idx) ptr mem)
+       // cond:
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 0) {
+               _ = v_0.Args[1]
+               idx := v_0.Args[0]
+               if idx != v_0.Args[1] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) _)
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       // match: (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstorezeroidx ptr idx mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               c := v_0.AuxInt
-               if !(int32(c) == 0) {
+               if v_1.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==1
-       // result: x
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               idx := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstorezeroidx {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 1) {
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==1
-       // result: x
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if idx != x.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(int32(c) == 1) {
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstorezeroidx2_0(v *Value) bool {
+       // match: (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVHstorezero [c<<1] ptr mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = c << 1
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVQstorezero [off1+off2] {sym} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
+       // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWUload [off1+off2] {sym} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       // match: (MOVWUload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWUloadidx ptr idx mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpARM64MOVWUloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       // match: (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWUloadidx4 ptr idx mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+               if v_0.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWUloadidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWstorezero {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
+       // match: (MOVWUloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWUload [c] ptr mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       // match: (MOVWUloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVWUload [c] ptr mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       // match: (MOVWUloadidx ptr (SLLconst [2] idx) mem)
+       // cond:
+       // result: (MOVWUloadidx4 ptr idx mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+               if v_1.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUloadidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       // match: (MOVWUloadidx (SLLconst [2] idx) ptr mem)
+       // cond:
+       // result: (MOVWUloadidx4 ptr idx mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+               if v_0.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUloadidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MULW (MOVDconst [c]) (MOVDconst [d]))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUloadidx4_0(v *Value) bool {
+       // match: (MOVWUloadidx4 ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVDconst [int64(int32(c)*int32(d))])
+       // result: (MOVWUload [c<<2] ptr mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c) * int32(d))
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = c << 2
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [d]) (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c)*int32(d))])
+       // match: (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx4 {
                        break
                }
-               d := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
                        break
                }
-               c := v_1.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c) * int32(d))
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MVN_0(v *Value) bool {
-       // match: (MVN (MOVDconst [c]))
+func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
+       // match: (MOVWUreg x:(MOVBUload _ _))
        // cond:
-       // result: (MOVDconst [^c])
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = ^c
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64NEG_0(v *Value) bool {
-       // match: (NEG (MUL x y))
+       // match: (MOVWUreg x:(MOVHUload _ _))
        // cond:
-       // result: (MNEG x y)
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MUL {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUload {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpARM64MNEG)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (NEG (MULW x y))
+       // match: (MOVWUreg x:(MOVWUload _ _))
        // cond:
-       // result: (MNEGW x y)
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MULW {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUload {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpARM64MNEGW)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (NEG (MOVDconst [c]))
+       // match: (MOVWUreg x:(MOVBUloadidx _ _ _))
        // cond:
-       // result: (MOVDconst [-c])
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -c
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64NotEqual_0(v *Value) bool {
-       // match: (NotEqual (FlagEQ))
+       // match: (MOVWUreg x:(MOVHUloadidx _ _ _))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (NotEqual (FlagLT_ULT))
+       // match: (MOVWUreg x:(MOVWUloadidx _ _ _))
        // cond:
-       // result: (MOVDconst [1])
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUloadidx {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (NotEqual (FlagLT_UGT))
+       // match: (MOVWUreg x:(MOVHUloadidx2 _ _ _))
        // cond:
-       // result: (MOVDconst [1])
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx2 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (NotEqual (FlagGT_ULT))
+       // match: (MOVWUreg x:(MOVWUloadidx4 _ _ _))
        // cond:
-       // result: (MOVDconst [1])
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUloadidx4 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
        }
-       // match: (NotEqual (FlagGT_UGT))
+       // match: (MOVWUreg x:(MOVBUreg _))
        // cond:
-       // result: (MOVDconst [1])
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (NotEqual (InvertFlags x))
+       // match: (MOVWUreg x:(MOVHUreg _))
        // cond:
-       // result: (NotEqual x)
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUreg {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64NotEqual)
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64OR_0(v *Value) bool {
-       // match: (OR x (MOVDconst [c]))
+func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool {
+       // match: (MOVWUreg x:(MOVWUreg _))
        // cond:
-       // result: (ORconst [c] x)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if x.Op != OpARM64MOVWUreg {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (OR (MOVDconst [c]) x)
+       // match: (MOVWUreg (ANDconst [c] x))
        // cond:
-       // result: (ORconst [c] x)
+       // result: (ANDconst [c&(1<<32-1)] x)
        for {
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
                c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<32 - 1)
                v.AddArg(x)
                return true
        }
-       // match: (OR x x)
+       // match: (MOVWUreg (MOVDconst [c]))
        // cond:
-       // result: x
+       // result: (MOVDconst [int64(uint32(c))])
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint32(c))
                return true
        }
-       // match: (OR x (MVN y))
-       // cond:
-       // result: (ORN x y)
+       // match: (MOVWUreg (SLLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<32-1, sc)
+       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MVN {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpARM64ORN)
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<32-1, sc)) {
+                       break
+               }
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (OR (MVN y) x)
-       // cond:
-       // result: (ORN x y)
+       // match: (MOVWUreg (SRLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<32-1, 0)
+       // result: (UBFX [arm64BFAuxInt(sc, 32)] x)
        for {
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MVN {
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpARM64ORN)
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<32-1, 0)) {
+                       break
+               }
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, 32)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (OR x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftLL x0 y [c])
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWload [off1+off2] {sym} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR x1:(SLLconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftLL x0 y [c])
+       // match: (MOVWload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWloadidx ptr idx mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
+               v.reset(OpARM64MOVWloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRL x0 y [c])
+       // match: (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWloadidx4 ptr idx mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_0.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64ORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWloadidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR x1:(SRLconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRL x0 y [c])
+       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SRLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64ORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRA x0 y [c])
+       // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRAconst {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWstorezero {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpARM64ORshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (OR x1:(SRAconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRA x0 y [c])
+func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool {
+       // match: (MOVWloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWload [c] ptr mem)
        for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SRAconst {
-                       break
-               }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64ORshiftRA)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWload)
                v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR (UBFIZ [bfc] x) (ANDconst [ac] y))
-       // cond: ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
-       // result: (BFI [bfc] y x)
+       // match: (MOVWloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVWload [c] ptr mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFIZ {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx ptr (SLLconst [2] idx) mem)
+       // cond:
+       // result: (MOVWloadidx4 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64ANDconst {
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               ac := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(ac == ^((1<<uint(getARM64BFwidth(bfc)) - 1) << uint(getARM64BFlsb(bfc)))) {
+               if v_1.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64BFI)
-               v.AuxInt = bfc
-               v.AddArg(y)
-               v.AddArg(x)
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWloadidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR (ANDconst [ac] y) (UBFIZ [bfc] x))
-       // cond: ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
-       // result: (BFI [bfc] y x)
+       // match: (MOVWloadidx (SLLconst [2] idx) ptr mem)
+       // cond:
+       // result: (MOVWloadidx4 ptr idx mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
-                       break
-               }
-               ac := v_0.AuxInt
-               y := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFIZ {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               bfc := v_1.AuxInt
-               x := v_1.Args[0]
-               if !(ac == ^((1<<uint(getARM64BFwidth(bfc)) - 1) << uint(getARM64BFlsb(bfc)))) {
+               if v_0.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64BFI)
-               v.AuxInt = bfc
-               v.AddArg(y)
-               v.AddArg(x)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWloadidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR (UBFX [bfc] x) (ANDconst [ac] y))
-       // cond: ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
-       // result: (BFXIL [bfc] y x)
+       // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFX {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ANDconst {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               ac := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(ac == ^(1<<uint(getARM64BFwidth(bfc)) - 1)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWloadidx4_0(v *Value) bool {
+       // match: (MOVWloadidx4 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWload [c<<2] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64BFXIL)
-               v.AuxInt = bfc
-               v.AddArg(y)
-               v.AddArg(x)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = c << 2
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR (ANDconst [ac] y) (UBFX [bfc] x))
-       // cond: ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
-       // result: (BFXIL [bfc] y x)
+       // match: (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx4 {
                        break
                }
-               ac := v_0.AuxInt
-               y := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
                        break
                }
-               bfc := v_1.AuxInt
-               x := v_1.Args[0]
-               if !(ac == ^(1<<uint(getARM64BFwidth(bfc)) - 1)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool {
+       // match: (MOVWreg x:(MOVBload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBload {
                        break
                }
-               v.reset(OpARM64BFXIL)
-               v.AuxInt = bfc
-               v.AddArg(y)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // match: (MOVWreg x:(MOVBUload _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               t := v.Type
-               _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o0.AuxInt != 8 {
-                       break
-               }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
                        break
                }
-               if o1.AuxInt != 16 {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHload {
                        break
                }
-               _ = o1.Args[1]
-               s0 := o1.Args[0]
-               if s0.Op != OpARM64SLLconst {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUload {
                        break
                }
-               if s0.AuxInt != 24 {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWload {
                        break
                }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool {
+       // match: (MOVWreg x:(MOVHloadidx2 _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHloadidx2 {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHUloadidx2 _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx2 {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWloadidx4 _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWloadidx4 {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c))
+               return true
+       }
+       // match: (MOVWreg (SLLconst [lc] x))
+       // cond: lc < 32
+       // result: (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               lc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(lc < 32) {
+                       break
+               }
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BFAuxInt(lc, 32-lc)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstoreidx ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstoreidx4 ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVWstorezero [off] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstore [i-4] {s} ptr0 w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVDstoreidx ptr1 idx1 w mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx4 {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstore_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstore [i-4] {s} ptr0 w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVDstoreidx ptr1 idx1 w0 mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx4 {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstoreidx_0(v *Value) bool {
+       // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem)
+       // cond:
+       // result: (MOVWstore [c] ptr val mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx (MOVDconst [c]) idx val mem)
+       // cond:
+       // result: (MOVWstore [c] idx val mem)
+       for {
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr (SLLconst [2] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx4 ptr idx val mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx (SLLconst [2] idx) ptr val mem)
+       // cond:
+       // result: (MOVWstoreidx4 ptr idx val mem)
+       for {
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVWstorezeroidx ptr idx mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_2.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr idx (MOVWreg x) mem)
+       // cond:
+       // result: (MOVWstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVWstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVDstoreidx ptr idx w mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if v_1.AuxInt != 4 {
+                       break
+               }
+               idx := v_1.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpARM64MOVWstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               if ptr != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstoreidx4_0(v *Value) bool {
+       // match: (MOVWstoreidx4 ptr (MOVDconst [c]) val mem)
+       // cond:
+       // result: (MOVWstore [c<<2] ptr val mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = c << 2
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVWstorezeroidx4 ptr idx mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_2.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx4 ptr idx (MOVWreg x) mem)
+       // cond:
+       // result: (MOVWstoreidx4 ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx4 ptr idx (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVWstoreidx4 ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstorezero [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstorezeroidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstorezeroidx4 ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezero {
+                       break
+               }
+               j := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVDstorezeroidx ptr1 idx1 mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezeroidx {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezeroidx4 {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v *Value) bool {
+       // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWstorezero [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem)
+       // cond:
+       // result: (MOVWstorezero [c] idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezeroidx ptr (SLLconst [2] idx) mem)
+       // cond:
+       // result: (MOVWstorezeroidx4 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezeroidx (SLLconst [2] idx) ptr mem)
+       // cond:
+       // result: (MOVWstorezeroidx4 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVDstorezeroidx ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if v_1.AuxInt != 4 {
+                       break
+               }
+               idx := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstorezeroidx {
+                       break
+               }
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezeroidx4_0(v *Value) bool {
+       // match: (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWstorezero [c<<2] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = c << 2
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MUL_0(v *Value) bool {
+       // match: (MUL (NEG x) y)
+       // cond:
+       // result: (MNEG x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64NEG {
+                       break
+               }
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64MNEG)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MUL y (NEG x))
+       // cond:
+       // result: (MNEG x y)
+       for {
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64NEG {
+                       break
+               }
+               x := v_1.Args[0]
+               v.reset(OpARM64MNEG)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MUL x (MOVDconst [-1]))
+       // cond:
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [-1]) x)
+       // cond:
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL _ (MOVDconst [0]))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MUL (MOVDconst [0]) _)
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MUL x (MOVDconst [1]))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [1]) x)
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MUL_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MUL_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c*d])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c * d
+               return true
+       }
+       // match: (MUL (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c*d])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c * d
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MULW_0(v *Value) bool {
+       // match: (MULW (NEG x) y)
+       // cond:
+       // result: (MNEGW x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64NEG {
+                       break
+               }
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64MNEGW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MULW y (NEG x))
+       // cond:
+       // result: (MNEGW x y)
+       for {
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64NEG {
+                       break
+               }
+               x := v_1.Args[0]
+               v.reset(OpARM64MNEGW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==-1
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: int32(c)==-1
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW _ (MOVDconst [c]))
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) _)
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==1
+       // result: x
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == 1) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: int32(c)==1
+       // result: x
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(int32(c) == 1) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MULW_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MULW_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c)*int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c) * int32(d))
+               return true
+       }
+       // match: (MULW (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c)*int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c) * int32(d))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MVN_0(v *Value) bool {
+       // match: (MVN (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [^c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = ^c
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NEG_0(v *Value) bool {
+       // match: (NEG (MUL x y))
+       // cond:
+       // result: (MNEG x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MUL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpARM64MNEG)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (NEG (MULW x y))
+       // cond:
+       // result: (MNEGW x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MULW {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpARM64MNEGW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (NEG (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [-c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -c
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NotEqual_0(v *Value) bool {
+       // match: (NotEqual (FlagEQ))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagEQ {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (NotEqual (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (FlagGT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (FlagGT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (InvertFlags x))
+       // cond:
+       // result: (NotEqual x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpARM64NotEqual)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64OR_0(v *Value) bool {
+       // match: (OR x (MOVDconst [c]))
+       // cond:
+       // result: (ORconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (MOVDconst [c]) x)
+       // cond:
+       // result: (ORconst [c] x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR x x)
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR x (MVN y))
+       // cond:
+       // result: (ORN x y)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MVN {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpARM64ORN)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR (MVN y) x)
+       // cond:
+       // result: (ORN x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MVN {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ORN)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftLL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR x1:(SLLconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftLL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR x1:(SRLconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRA x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (OR x1:(SRAconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRA x0 y [c])
+       for {
+               _ = v.Args[1]
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR (UBFIZ [bfc] x) (ANDconst [ac] y))
+       // cond: ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
+       // result: (BFI [bfc] y x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFIZ {
+                       break
+               }
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               ac := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(ac == ^((1<<uint(getARM64BFwidth(bfc)) - 1) << uint(getARM64BFlsb(bfc)))) {
+                       break
+               }
+               v.reset(OpARM64BFI)
+               v.AuxInt = bfc
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (ANDconst [ac] y) (UBFIZ [bfc] x))
+       // cond: ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
+       // result: (BFI [bfc] y x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
+                       break
+               }
+               ac := v_0.AuxInt
+               y := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFIZ {
+                       break
+               }
+               bfc := v_1.AuxInt
+               x := v_1.Args[0]
+               if !(ac == ^((1<<uint(getARM64BFwidth(bfc)) - 1) << uint(getARM64BFlsb(bfc)))) {
+                       break
+               }
+               v.reset(OpARM64BFI)
+               v.AuxInt = bfc
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (UBFX [bfc] x) (ANDconst [ac] y))
+       // cond: ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
+       // result: (BFXIL [bfc] y x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFX {
+                       break
+               }
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               ac := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(ac == ^(1<<uint(getARM64BFwidth(bfc)) - 1)) {
+                       break
+               }
+               v.reset(OpARM64BFXIL)
+               v.AuxInt = bfc
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (ANDconst [ac] y) (UBFX [bfc] x))
+       // cond: ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
+       // result: (BFXIL [bfc] y x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
+                       break
+               }
+               ac := v_0.AuxInt
+               y := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
+                       break
+               }
+               bfc := v_1.AuxInt
+               x := v_1.Args[0]
+               if !(ac == ^(1<<uint(getARM64BFwidth(bfc)) - 1)) {
+                       break
+               }
+               v.reset(OpARM64BFXIL)
+               v.AuxInt = bfc
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x3.AuxInt
+               s := x3.Aux
+               _ = x3.Args[1]
+               p := x3.Args[0]
+               mem := x3.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[1]
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x0.AuxInt != 3 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 1 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               p1 := x2.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x3.Args[2]
+               ptr0 := x3.Args[0]
+               idx0 := x3.Args[1]
+               if mem != x3.Args[2] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x3.Args[2]
+               ptr0 := x3.Args[0]
+               idx0 := x3.Args[1]
+               mem := x3.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x0.AuxInt != 3 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 1 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               p1 := x2.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x0_1.AuxInt != 3 {
+                       break
+               }
+               idx := x0_1.Args[0]
+               mem := x0.Args[2]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x1_1.AuxInt != 2 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x2_1.AuxInt != 1 {
+                       break
+               }
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               if mem != x3.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x3.Args[2]
+               ptr := x3.Args[0]
+               idx := x3.Args[1]
+               mem := x3.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x0_1.AuxInt != 3 {
+                       break
+               }
+               if idx != x0_1.Args[0] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x1_1.AuxInt != 2 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x2_1.AuxInt != 1 {
+                       break
+               }
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i5 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i4 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x5.AuxInt
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x6.AuxInt
+               if x6.Aux != s {
+                       break
+               }
+               _ = x6.Args[1]
+               if p != x6.Args[0] {
+                       break
+               }
+               if mem != x6.Args[1] {
+                       break
+               }
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x7.AuxInt
+               if x7.Aux != s {
+                       break
+               }
+               _ = x7.Args[1]
+               if p != x7.Args[0] {
+                       break
+               }
+               if mem != x7.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x7.AuxInt
+               s := x7.Aux
+               _ = x7.Args[1]
+               p := x7.Args[0]
+               mem := x7.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[1]
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i5 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i4 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x5.AuxInt
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x6.AuxInt
+               if x6.Aux != s {
+                       break
+               }
+               _ = x6.Args[1]
+               if p != x6.Args[0] {
+                       break
+               }
+               if mem != x6.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x0.AuxInt != 7 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x3.AuxInt != 4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x4.AuxInt != 3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x5.AuxInt != 2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x6.AuxInt != 1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               _ = x6.Args[1]
+               p1 := x6.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x6.Args[1] {
+                       break
+               }
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x7.Args[2]
+               ptr0 := x7.Args[0]
+               idx0 := x7.Args[1]
+               if mem != x7.Args[2] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x7.Args[2]
+               ptr0 := x7.Args[0]
+               idx0 := x7.Args[1]
+               mem := x7.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
                x0 := y0.Args[0]
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o1.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               if x0.AuxInt != 7 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x3.AuxInt != 4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
                        break
                }
-               i2 := x1.AuxInt
-               if x1.Aux != s {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               if x4.AuxInt != 3 {
                        break
                }
-               if mem != x1.Args[1] {
+               if x4.Aux != s {
                        break
                }
-               y2 := o0.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               if mem != x4.Args[1] {
                        break
                }
-               i1 := x2.AuxInt
-               if x2.Aux != s {
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
                        break
                }
-               if mem != x2.Args[1] {
+               if x5.AuxInt != 2 {
                        break
                }
-               y3 := v.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               if x5.Aux != s {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
                        break
                }
-               i0 := x3.AuxInt
-               if x3.Aux != s {
+               if mem != x5.Args[1] {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] {
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
                        break
                }
-               if mem != x3.Args[1] {
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if x6.AuxInt != 1 {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               if x6.Aux != s {
+                       break
+               }
+               _ = x6.Args[1]
+               p1 := x6.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x6.Args[1] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
-               v0.AddArg(v1)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
                v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
        for {
                t := v.Type
                _ = v.Args[1]
-               y3 := v.Args[0]
-               if y3.Op != OpARM64MOVDnop {
-                       break
-               }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
-                       break
-               }
-               i0 := x3.AuxInt
-               s := x3.Aux
-               _ = x3.Args[1]
-               p := x3.Args[0]
-               mem := x3.Args[1]
-               o0 := v.Args[1]
+               o0 := v.Args[0]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -14308,11 +18371,43 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               s0 := o1.Args[0]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
                if s0.Op != OpARM64SLLconst {
                        break
                }
-               if s0.AuxInt != 24 {
+               if s0.AuxInt != 56 {
                        break
                }
                y0 := s0.Args[0]
@@ -14320,80 +18415,219 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i3 := x0.AuxInt
-               if x0.Aux != s {
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x0_1.AuxInt != 7 {
+                       break
+               }
+               idx := x0_1.Args[0]
+               mem := x0.Args[2]
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x1_1.AuxInt != 6 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x2_1.AuxInt != 5 {
+                       break
+               }
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
+                       break
+               }
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x3_1.AuxInt != 4 {
+                       break
+               }
+               if idx != x3_1.Args[0] {
+                       break
+               }
+               if mem != x3.Args[2] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
+                       break
+               }
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x4_1.AuxInt != 3 {
+                       break
+               }
+               if idx != x4_1.Args[0] {
+                       break
+               }
+               if mem != x4.Args[2] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x5.Args[2]
+               if ptr != x5.Args[0] {
+                       break
+               }
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x5_1.AuxInt != 2 {
+                       break
+               }
+               if idx != x5_1.Args[0] {
+                       break
+               }
+               if mem != x5.Args[2] {
                        break
                }
-               _ = x0.Args[1]
-               if p != x0.Args[0] {
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
                        break
                }
-               if mem != x0.Args[1] {
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUloadidx {
                        break
                }
-               y1 := o1.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               _ = x6.Args[2]
+               if ptr != x6.Args[0] {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               x6_1 := x6.Args[1]
+               if x6_1.Op != OpARM64ADDconst {
                        break
                }
-               i2 := x1.AuxInt
-               if x1.Aux != s {
+               if x6_1.AuxInt != 1 {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               if idx != x6_1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if mem != x6.Args[2] {
                        break
                }
-               y2 := o0.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i1 := x2.AuxInt
-               if x2.Aux != s {
+               _ = x7.Args[2]
+               if ptr != x7.Args[0] {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if idx != x7.Args[1] {
                        break
                }
-               if mem != x2.Args[1] {
+               if mem != x7.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
-               v0.AddArg(v1)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
        for {
                t := v.Type
                _ = v.Args[1]
-               o0 := v.Args[0]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x7.Args[2]
+               ptr := x7.Args[0]
+               idx := x7.Args[1]
+               mem := x7.Args[2]
+               o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -14409,11 +18643,43 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               s0 := o1.Args[0]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
                if s0.Op != OpARM64SLLconst {
                        break
                }
-               if s0.AuxInt != 24 {
+               if s0.AuxInt != 56 {
                        break
                }
                y0 := s0.Args[0]
@@ -14421,63 +18687,77 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x0.AuxInt != 3 {
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
                        break
                }
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o1.Args[1]
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x0_1.AuxInt != 7 {
+                       break
+               }
+               if idx != x0_1.Args[0] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
                x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x1.AuxInt != 2 {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               if x1.Aux != s {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               if x1_1.AuxInt != 6 {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x1_1.Args[0] {
                        break
                }
-               y2 := o0.Args[1]
+               if mem != x1.Args[2] {
+                       break
+               }
+               y2 := o4.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
                x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x2.AuxInt != 1 {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               if x2.Aux != s {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x2.Args[1]
-               p1 := x2.Args[0]
-               if p1.Op != OpARM64ADD {
+               if x2_1.AuxInt != 5 {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x2.Args[1] {
+               if idx != x2_1.Args[0] {
                        break
                }
-               y3 := v.Args[1]
+               if mem != x2.Args[2] {
+                       break
+               }
+               y3 := o3.Args[1]
                if y3.Op != OpARM64MOVDnop {
                        break
                }
@@ -14486,142 +18766,112 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                _ = x3.Args[2]
-               ptr0 := x3.Args[0]
-               idx0 := x3.Args[1]
-               if mem != x3.Args[2] {
+               if ptr != x3.Args[0] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AddArg(ptr0)
-               v0.AddArg(idx0)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (OR <t> y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
-       for {
-               t := v.Type
-               _ = v.Args[1]
-               y3 := v.Args[0]
-               if y3.Op != OpARM64MOVDnop {
+               if x3_1.AuxInt != 4 {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUloadidx {
+               if idx != x3_1.Args[0] {
                        break
                }
-               _ = x3.Args[2]
-               ptr0 := x3.Args[0]
-               idx0 := x3.Args[1]
-               mem := x3.Args[2]
-               o0 := v.Args[1]
-               if o0.Op != OpARM64ORshiftLL {
+               if mem != x3.Args[2] {
                        break
                }
-               if o0.AuxInt != 8 {
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
                        break
                }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if o1.AuxInt != 16 {
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
                        break
                }
-               _ = o1.Args[1]
-               s0 := o1.Args[0]
-               if s0.Op != OpARM64SLLconst {
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64ADDconst {
                        break
                }
-               if s0.AuxInt != 24 {
+               if x4_1.AuxInt != 3 {
                        break
                }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               if idx != x4_1.Args[0] {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if mem != x4.Args[2] {
                        break
                }
-               if x0.AuxInt != 3 {
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
                        break
                }
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               if mem != x0.Args[1] {
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUloadidx {
                        break
                }
-               y1 := o1.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               _ = x5.Args[2]
+               if ptr != x5.Args[0] {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64ADDconst {
                        break
                }
-               if x1.AuxInt != 2 {
+               if x5_1.AuxInt != 2 {
                        break
                }
-               if x1.Aux != s {
+               if idx != x5_1.Args[0] {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               if mem != x5.Args[2] {
                        break
                }
-               if mem != x1.Args[1] {
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
                        break
                }
-               y2 := o0.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUloadidx {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               _ = x6.Args[2]
+               if ptr != x6.Args[0] {
                        break
                }
-               if x2.AuxInt != 1 {
+               x6_1 := x6.Args[1]
+               if x6_1.Op != OpARM64ADDconst {
                        break
                }
-               if x2.Aux != s {
+               if x6_1.AuxInt != 1 {
                        break
                }
-               _ = x2.Args[1]
-               p1 := x2.Args[0]
-               if p1.Op != OpARM64ADD {
+               if idx != x6_1.Args[0] {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x2.Args[1] {
+               if mem != x6.Args[2] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr0)
-               v0.AddArg(idx0)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -14641,43 +18891,11 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o2.AuxInt != 24 {
-                       break
-               }
-               _ = o2.Args[1]
-               o3 := o2.Args[0]
-               if o3.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o3.AuxInt != 32 {
-                       break
-               }
-               _ = o3.Args[1]
-               o4 := o3.Args[0]
-               if o4.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o4.AuxInt != 40 {
-                       break
-               }
-               _ = o4.Args[1]
-               o5 := o4.Args[0]
-               if o5.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o5.AuxInt != 48 {
-                       break
-               }
-               _ = o5.Args[1]
-               s0 := o5.Args[0]
+               s0 := o1.Args[0]
                if s0.Op != OpARM64SLLconst {
                        break
                }
-               if s0.AuxInt != 56 {
+               if s0.AuxInt != 24 {
                        break
                }
                y0 := s0.Args[0]
@@ -14688,12 +18906,12 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i7 := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                _ = x0.Args[1]
                p := x0.Args[0]
                mem := x0.Args[1]
-               y1 := o5.Args[1]
+               y1 := o1.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -14701,7 +18919,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x1.AuxInt
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -14712,7 +18930,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               y2 := o4.Args[1]
+               y2 := o0.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
@@ -14720,7 +18938,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x2.AuxInt
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -14731,7 +18949,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               y3 := o3.Args[1]
+               y3 := v.Args[1]
                if y3.Op != OpARM64MOVDnop {
                        break
                }
@@ -14739,7 +18957,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x3.AuxInt
+               i3 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -14750,122 +18968,146 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if mem != x3.Args[1] {
                        break
                }
-               y4 := o2.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               i3 := x4.AuxInt
-               if x4.Aux != s {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               i3 := x3.AuxInt
+               s := x3.Aux
+               _ = x3.Args[1]
+               p := x3.Args[0]
+               mem := x3.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               if mem != x4.Args[1] {
+               if o0.AuxInt != 8 {
                        break
                }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUload {
+               if o1.AuxInt != 16 {
                        break
                }
-               i2 := x5.AuxInt
-               if x5.Aux != s {
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               _ = x5.Args[1]
-               if p != x5.Args[0] {
+               if s0.AuxInt != 24 {
                        break
                }
-               if mem != x5.Args[1] {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUload {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               i1 := x6.AuxInt
-               if x6.Aux != s {
+               _ = x0.Args[1]
+               if p != x0.Args[0] {
                        break
                }
-               _ = x6.Args[1]
-               if p != x6.Args[0] {
+               if mem != x0.Args[1] {
                        break
                }
-               if mem != x6.Args[1] {
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               y7 := v.Args[1]
-               if y7.Op != OpARM64MOVDnop {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               i0 := x7.AuxInt
-               if x7.Aux != s {
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
                        break
                }
-               _ = x7.Args[1]
-               if p != x7.Args[0] {
+               if mem != x1.Args[1] {
                        break
                }
-               if mem != x7.Args[1] {
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               i2 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
                _ = v.Args[1]
-               y7 := v.Args[0]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
-                       break
-               }
-               i0 := x7.AuxInt
-               s := x7.Aux
-               _ = x7.Args[1]
-               p := x7.Args[0]
-               mem := x7.Args[1]
-               o0 := v.Args[1]
+               o0 := v.Args[0]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -14881,65 +19123,26 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o2.AuxInt != 24 {
-                       break
-               }
-               _ = o2.Args[1]
-               o3 := o2.Args[0]
-               if o3.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o3.AuxInt != 32 {
-                       break
-               }
-               _ = o3.Args[1]
-               o4 := o3.Args[0]
-               if o4.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o4.AuxInt != 40 {
-                       break
-               }
-               _ = o4.Args[1]
-               o5 := o4.Args[0]
-               if o5.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o5.AuxInt != 48 {
-                       break
-               }
-               _ = o5.Args[1]
-               s0 := o5.Args[0]
+               s0 := o1.Args[0]
                if s0.Op != OpARM64SLLconst {
                        break
-               }
-               if s0.AuxInt != 56 {
-                       break
-               }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
-                       break
-               }
-               i7 := x0.AuxInt
-               if x0.Aux != s {
+               }
+               if s0.AuxInt != 24 {
                        break
                }
-               _ = x0.Args[1]
-               if p != x0.Args[0] {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               if mem != x0.Args[1] {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               y1 := o5.Args[1]
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o1.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -14947,18 +19150,22 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x1.AuxInt
-               if x1.Aux != s {
+               if x1.AuxInt != 1 {
                        break
                }
+               s := x1.Aux
                _ = x1.Args[1]
-               if p != x1.Args[0] {
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
-               y2 := o4.Args[1]
+               y2 := o0.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
@@ -14966,18 +19173,18 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x2.AuxInt
-               if x2.Aux != s {
+               if x2.AuxInt != 2 {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if x2.Aux != s {
                        break
                }
+               _ = x2.Args[1]
+               p := x2.Args[0]
                if mem != x2.Args[1] {
                        break
                }
-               y3 := o3.Args[1]
+               y3 := v.Args[1]
                if y3.Op != OpARM64MOVDnop {
                        break
                }
@@ -14985,7 +19192,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x3.AuxInt
+               if x3.AuxInt != 3 {
+                       break
+               }
                if x3.Aux != s {
                        break
                }
@@ -14996,81 +19205,146 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x3.Args[1] {
                        break
                }
-               y4 := o2.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               i3 := x4.AuxInt
-               if x4.Aux != s {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               if x3.AuxInt != 3 {
                        break
                }
-               if mem != x4.Args[1] {
+               s := x3.Aux
+               _ = x3.Args[1]
+               p := x3.Args[0]
+               mem := x3.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
+               if o0.AuxInt != 8 {
                        break
                }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUload {
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               i2 := x5.AuxInt
-               if x5.Aux != s {
+               if o1.AuxInt != 16 {
                        break
                }
-               _ = x5.Args[1]
-               if p != x5.Args[0] {
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               if mem != x5.Args[1] {
+               if s0.AuxInt != 24 {
                        break
                }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUload {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i1 := x6.AuxInt
-               if x6.Aux != s {
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               if mem != x0.Args[2] {
                        break
                }
-               _ = x6.Args[1]
-               if p != x6.Args[0] {
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               if mem != x6.Args[1] {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if x1.AuxInt != 1 {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 2 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -15090,235 +19364,251 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               if o2.AuxInt != 24 {
+               if s0.AuxInt != 24 {
                        break
                }
-               _ = o2.Args[1]
-               o3 := o2.Args[0]
-               if o3.Op != OpARM64ORshiftLL {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               if o3.AuxInt != 32 {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               _ = o3.Args[1]
-               o4 := o3.Args[0]
-               if o4.Op != OpARM64ORshiftLL {
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               if o4.AuxInt != 40 {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               _ = o4.Args[1]
-               o5 := o4.Args[0]
-               if o5.Op != OpARM64ORshiftLL {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               if o5.AuxInt != 48 {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = o5.Args[1]
-               s0 := o5.Args[0]
-               if s0.Op != OpARM64SLLconst {
+               if x1_1.AuxInt != 1 {
                        break
                }
-               if s0.AuxInt != 56 {
+               if idx != x1_1.Args[0] {
                        break
                }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               if mem != x1.Args[2] {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               if x0.AuxInt != 7 {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o5.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               if x1.AuxInt != 6 {
+               if x2_1.AuxInt != 2 {
                        break
                }
-               if x1.Aux != s {
+               if idx != x2_1.Args[0] {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               if mem != x2.Args[2] {
                        break
                }
-               if mem != x1.Args[1] {
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               y2 := o4.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUloadidx {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
                        break
                }
-               if x2.AuxInt != 5 {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               if x2.Aux != s {
+               if x3_1.AuxInt != 3 {
+                       break
+               }
+               if idx != x3_1.Args[0] {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if mem != x3.Args[2] {
                        break
                }
-               if mem != x2.Args[1] {
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
-               y3 := o3.Args[1]
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y3 := v.Args[0]
                if y3.Op != OpARM64MOVDnop {
                        break
                }
                x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               if x3.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x3.AuxInt != 4 {
+               _ = x3.Args[2]
+               ptr := x3.Args[0]
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               if x3.Aux != s {
+               if x3_1.AuxInt != 3 {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] {
+               idx := x3_1.Args[0]
+               mem := x3.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               if mem != x3.Args[1] {
+               if o0.AuxInt != 8 {
                        break
                }
-               y4 := o2.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               if o1.AuxInt != 16 {
                        break
                }
-               if x4.AuxInt != 3 {
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               if x4.Aux != s {
+               if s0.AuxInt != 24 {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               if mem != x4.Args[1] {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
                        break
                }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUload {
+               if idx != x0.Args[1] {
                        break
                }
-               if x5.AuxInt != 2 {
+               if mem != x0.Args[2] {
                        break
                }
-               if x5.Aux != s {
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               _ = x5.Args[1]
-               if p != x5.Args[0] {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if mem != x5.Args[1] {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUload {
+               if x1_1.AuxInt != 1 {
                        break
                }
-               if x6.AuxInt != 1 {
+               if idx != x1_1.Args[0] {
                        break
                }
-               if x6.Aux != s {
+               if mem != x1.Args[2] {
                        break
                }
-               _ = x6.Args[1]
-               p1 := x6.Args[0]
-               if p1.Op != OpARM64ADD {
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x6.Args[1] {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               y7 := v.Args[1]
-               if y7.Op != OpARM64MOVDnop {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUloadidx {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x7.Args[2]
-               ptr0 := x7.Args[0]
-               idx0 := x7.Args[1]
-               if mem != x7.Args[2] {
+               if x2_1.AuxInt != 2 {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if idx != x2_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr0)
-               v0.AddArg(idx0)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                _ = v.Args[1]
-               y7 := v.Args[0]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x7.Args[2]
-               ptr0 := x7.Args[0]
-               idx0 := x7.Args[1]
-               mem := x7.Args[2]
-               o0 := v.Args[1]
+               o0 := v.Args[0]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -15381,15 +19671,11 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               if x0.AuxInt != 7 {
-                       break
-               }
+               i0 := x0.AuxInt
                s := x0.Aux
                _ = x0.Args[1]
                p := x0.Args[0]
-               if mem != x0.Args[1] {
-                       break
-               }
+               mem := x0.Args[1]
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -15398,9 +19684,7 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != 6 {
-                       break
-               }
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -15419,9 +19703,7 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != 5 {
-                       break
-               }
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -15440,9 +19722,7 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != 4 {
-                       break
-               }
+               i3 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -15461,9 +19741,7 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if x4.AuxInt != 3 {
-                       break
-               }
+               i4 := x4.AuxInt
                if x4.Aux != s {
                        break
                }
@@ -15471,174 +19749,74 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if p != x4.Args[0] {
                        break
                }
-               if mem != x4.Args[1] {
-                       break
-               }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
-                       break
-               }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x5.AuxInt != 2 {
-                       break
-               }
-               if x5.Aux != s {
-                       break
-               }
-               _ = x5.Args[1]
-               if p != x5.Args[0] {
-                       break
-               }
-               if mem != x5.Args[1] {
-                       break
-               }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
-                       break
-               }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x6.AuxInt != 1 {
-                       break
-               }
-               if x6.Aux != s {
-                       break
-               }
-               _ = x6.Args[1]
-               p1 := x6.Args[0]
-               if p1.Op != OpARM64ADD {
-                       break
-               }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x6.Args[1] {
-                       break
-               }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
-                       break
-               }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AddArg(ptr0)
-               v0.AddArg(idx0)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
-       for {
-               t := v.Type
-               _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o0.AuxInt != 8 {
-                       break
-               }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o1.AuxInt != 16 {
-                       break
-               }
-               _ = o1.Args[1]
-               s0 := o1.Args[0]
-               if s0.Op != OpARM64SLLconst {
-                       break
-               }
-               if s0.AuxInt != 24 {
-                       break
-               }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if mem != x4.Args[1] {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o1.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i5 := x5.AuxInt
+               if x5.Aux != s {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if mem != x5.Args[1] {
                        break
                }
-               y2 := o0.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x2.AuxInt
-               if x2.Aux != s {
+               i6 := x6.AuxInt
+               if x6.Aux != s {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               _ = x6.Args[1]
+               if p != x6.Args[0] {
                        break
                }
-               if mem != x2.Args[1] {
+               if mem != x6.Args[1] {
                        break
                }
-               y3 := v.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x3.AuxInt
-               if x3.Aux != s {
+               i7 := x7.AuxInt
+               if x7.Aux != s {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] {
+               _ = x7.Args[1]
+               if p != x7.Args[0] {
                        break
                }
-               if mem != x3.Args[1] {
+               if mem != x7.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
                v1.Aux = s
                v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
                v2.AuxInt = i0
@@ -15648,25 +19826,25 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                _ = v.Args[1]
-               y3 := v.Args[0]
-               if y3.Op != OpARM64MOVDnop {
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x3.AuxInt
-               s := x3.Aux
-               _ = x3.Args[1]
-               p := x3.Args[0]
-               mem := x3.Args[1]
+               i7 := x7.AuxInt
+               s := x7.Aux
+               _ = x7.Args[1]
+               p := x7.Args[0]
+               mem := x7.Args[1]
                o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
@@ -15683,11 +19861,43 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               s0 := o1.Args[0]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
                if s0.Op != OpARM64SLLconst {
                        break
                }
-               if s0.AuxInt != 24 {
+               if s0.AuxInt != 56 {
                        break
                }
                y0 := s0.Args[0]
@@ -15709,7 +19919,7 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x0.Args[1] {
                        break
                }
-               y1 := o1.Args[1]
+               y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -15728,7 +19938,7 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               y2 := o0.Args[1]
+               y2 := o4.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
@@ -15747,14 +19957,90 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i4 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               i5 := x5.AuxInt
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x6.AuxInt
+               if x6.Aux != s {
+                       break
+               }
+               _ = x6.Args[1]
+               if p != x6.Args[0] {
+                       break
+               }
+               if mem != x6.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
                v1.Aux = s
                v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
                v2.AuxInt = i0
@@ -15764,9 +20050,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -15786,11 +20072,43 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               s0 := o1.Args[0]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
                if s0.Op != OpARM64SLLconst {
                        break
                }
-               if s0.AuxInt != 24 {
+               if s0.AuxInt != 56 {
                        break
                }
                y0 := s0.Args[0]
@@ -15805,7 +20123,7 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                ptr0 := x0.Args[0]
                idx0 := x0.Args[1]
                mem := x0.Args[2]
-               y1 := o1.Args[1]
+               y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -15828,7 +20146,7 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               y2 := o0.Args[1]
+               y2 := o4.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
@@ -15847,7 +20165,7 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               y3 := v.Args[1]
+               y3 := o3.Args[1]
                if y3.Op != OpARM64MOVDnop {
                        break
                }
@@ -15868,145 +20186,126 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x3.Args[1] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
-                       break
-               }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-               v1.AddArg(ptr0)
-               v1.AddArg(idx0)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
-       for {
-               t := v.Type
-               _ = v.Args[1]
-               y3 := v.Args[0]
-               if y3.Op != OpARM64MOVDnop {
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != 3 {
+               if x4.AuxInt != 4 {
                        break
                }
-               s := x3.Aux
-               _ = x3.Args[1]
-               p := x3.Args[0]
-               mem := x3.Args[1]
-               o0 := v.Args[1]
-               if o0.Op != OpARM64ORshiftLL {
+               if x4.Aux != s {
                        break
                }
-               if o0.AuxInt != 8 {
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
                        break
                }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
+               if mem != x4.Args[1] {
                        break
                }
-               if o1.AuxInt != 16 {
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
                        break
                }
-               _ = o1.Args[1]
-               s0 := o1.Args[0]
-               if s0.Op != OpARM64SLLconst {
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
                        break
                }
-               if s0.AuxInt != 24 {
+               if x5.AuxInt != 5 {
                        break
                }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               if x5.Aux != s {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
                        break
                }
-               _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               if mem != x0.Args[2] {
+               if mem != x5.Args[1] {
                        break
                }
-               y1 := o1.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != 1 {
+               if x6.AuxInt != 6 {
                        break
                }
-               if x1.Aux != s {
+               if x6.Aux != s {
                        break
                }
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               _ = x6.Args[1]
+               if p != x6.Args[0] {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x1.Args[1] {
+               if mem != x6.Args[1] {
                        break
                }
-               y2 := o0.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != 2 {
+               if x7.AuxInt != 7 {
                        break
                }
-               if x2.Aux != s {
+               if x7.Aux != s {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               _ = x7.Args[1]
+               if p != x7.Args[0] {
                        break
                }
-               if mem != x2.Args[1] {
+               if mem != x7.Args[1] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
                v1.AddArg(ptr0)
                v1.AddArg(idx0)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
                _ = v.Args[1]
-               o0 := v.Args[0]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x7.AuxInt != 7 {
+                       break
+               }
+               s := x7.Aux
+               _ = x7.Args[1]
+               p := x7.Args[0]
+               mem := x7.Args[1]
+               o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -16066,14 +20365,15 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if x0.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               if mem != x0.Args[2] {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -16082,14 +20382,20 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x1.AuxInt
+               if x1.AuxInt != 1 {
+                       break
+               }
                if x1.Aux != s {
                        break
                }
                _ = x1.Args[1]
-               if p != x1.Args[0] {
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
@@ -16101,7 +20407,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x2.AuxInt
+               if x2.AuxInt != 2 {
+                       break
+               }
                if x2.Aux != s {
                        break
                }
@@ -16120,7 +20428,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x3.AuxInt
+               if x3.AuxInt != 3 {
+                       break
+               }
                if x3.Aux != s {
                        break
                }
@@ -16139,7 +20449,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x4.AuxInt
+               if x4.AuxInt != 4 {
+                       break
+               }
                if x4.Aux != s {
                        break
                }
@@ -16158,7 +20470,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x5.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x5.AuxInt
+               if x5.AuxInt != 5 {
+                       break
+               }
                if x5.Aux != s {
                        break
                }
@@ -16177,7 +20491,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x6.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x6.AuxInt
+               if x6.AuxInt != 6 {
+                       break
+               }
                if x6.Aux != s {
                        break
                }
@@ -16188,62 +20504,27 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x6.Args[1] {
                        break
                }
-               y7 := v.Args[1]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
-                       break
-               }
-               i7 := x7.AuxInt
-               if x7.Aux != s {
-                       break
-               }
-               _ = x7.Args[1]
-               if p != x7.Args[0] {
-                       break
-               }
-               if mem != x7.Args[1] {
-                       break
-               }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
                v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
        for {
                t := v.Type
                _ = v.Args[1]
-               y7 := v.Args[0]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
-                       break
-               }
-               i7 := x7.AuxInt
-               s := x7.Aux
-               _ = x7.Args[1]
-               p := x7.Args[0]
-               mem := x7.Args[1]
-               o0 := v.Args[1]
+               o0 := v.Args[0]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -16303,37 +20584,36 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
-                       break
-               }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               _ = x0.Args[1]
-               if p != x0.Args[0] {
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               if mem != x0.Args[1] {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               y1 := o5.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x1_1.AuxInt != 1 {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               if idx != x1_1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if mem != x1.Args[2] {
                        break
                }
                y2 := o4.Args[1]
@@ -16341,18 +20621,24 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i2 := x2.AuxInt
-               if x2.Aux != s {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               = x2.Args[1]
-               if p != x2.Args[0] {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x2.Args[1] {
+               if x2_1.AuxInt != 2 {
+                       break
+               }
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
                        break
                }
                y3 := o3.Args[1]
@@ -16360,18 +20646,24 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               if x3.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i3 := x3.AuxInt
-               if x3.Aux != s {
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
                        break
                }
-               = x3.Args[1]
-               if p != x3.Args[0] {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x3.Args[1] {
+               if x3_1.AuxInt != 3 {
+                       break
+               }
+               if idx != x3_1.Args[0] {
+                       break
+               }
+               if mem != x3.Args[2] {
                        break
                }
                y4 := o2.Args[1]
@@ -16379,18 +20671,24 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               if x4.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i4 := x4.AuxInt
-               if x4.Aux != s {
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
                        break
                }
-               = x4.Args[1]
-               if p != x4.Args[0] {
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x4.Args[1] {
+               if x4_1.AuxInt != 4 {
+                       break
+               }
+               if idx != x4_1.Args[0] {
+                       break
+               }
+               if mem != x4.Args[2] {
                        break
                }
                y5 := o1.Args[1]
@@ -16398,18 +20696,24 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUload {
+               if x5.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i5 := x5.AuxInt
-               if x5.Aux != s {
+               _ = x5.Args[2]
+               if ptr != x5.Args[0] {
                        break
                }
-               = x5.Args[1]
-               if p != x5.Args[0] {
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x5.Args[1] {
+               if x5_1.AuxInt != 5 {
+                       break
+               }
+               if idx != x5_1.Args[0] {
+                       break
+               }
+               if mem != x5.Args[2] {
                        break
                }
                y6 := o0.Args[1]
@@ -16417,44 +20721,91 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUload {
+               if x6.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i6 := x6.AuxInt
-               if x6.Aux != s {
+               _ = x6.Args[2]
+               if ptr != x6.Args[0] {
                        break
                }
-               = x6.Args[1]
-               if p != x6.Args[0] {
+               x6_1 := x6.Args[1]
+               if x6_1.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x6.Args[1] {
+               if x6_1.AuxInt != 6 {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if idx != x6_1.Args[0] {
+                       break
+               }
+               if mem != x6.Args[2] {
+                       break
+               }
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x7.Args[2]
+               if ptr != x7.Args[0] {
+                       break
+               }
+               x7_1 := x7.Args[1]
+               if x7_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x7_1.AuxInt != 7 {
+                       break
+               }
+               if idx != x7_1.Args[0] {
+                       break
+               }
+               if mem != x7.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
                v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
        for {
                t := v.Type
                _ = v.Args[1]
-               o0 := v.Args[0]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x7.Args[2]
+               ptr := x7.Args[0]
+               x7_1 := x7.Args[1]
+               if x7_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x7_1.AuxInt != 7 {
+                       break
+               }
+               idx := x7_1.Args[0]
+               mem := x7.Args[2]
+               o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -16518,30 +20869,38 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               mem := x0.Args[2]
+               if ptr != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
                x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x1.AuxInt != 1 {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               s := x1.Aux
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x1.Args[1] {
+               if x1_1.AuxInt != 1 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
                        break
                }
                y2 := o4.Args[1]
@@ -16549,18 +20908,24 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x2.AuxInt != 2 {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               if x2.Aux != s {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x2.Args[1]
-               p := x2.Args[0]
-               if mem != x2.Args[1] {
+               if x2_1.AuxInt != 2 {
+                       break
+               }
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
                        break
                }
                y3 := o3.Args[1]
@@ -16568,20 +20933,24 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               if x3.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x3.AuxInt != 3 {
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
                        break
                }
-               if x3.Aux != s {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] {
+               if x3_1.AuxInt != 3 {
                        break
                }
-               if mem != x3.Args[1] {
+               if idx != x3_1.Args[0] {
+                       break
+               }
+               if mem != x3.Args[2] {
                        break
                }
                y4 := o2.Args[1]
@@ -16589,20 +20958,24 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               if x4.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x4.AuxInt != 4 {
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
                        break
                }
-               if x4.Aux != s {
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               if x4_1.AuxInt != 4 {
                        break
                }
-               if mem != x4.Args[1] {
+               if idx != x4_1.Args[0] {
+                       break
+               }
+               if mem != x4.Args[2] {
                        break
                }
                y5 := o1.Args[1]
@@ -16610,20 +20983,24 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUload {
+               if x5.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x5.AuxInt != 5 {
+               _ = x5.Args[2]
+               if ptr != x5.Args[0] {
                        break
                }
-               if x5.Aux != s {
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x5.Args[1]
-               if p != x5.Args[0] {
+               if x5_1.AuxInt != 5 {
                        break
                }
-               if mem != x5.Args[1] {
+               if idx != x5_1.Args[0] {
+                       break
+               }
+               if mem != x5.Args[2] {
                        break
                }
                y6 := o0.Args[1]
@@ -16631,836 +21008,948 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUload {
+               if x6.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x6.AuxInt != 6 {
-                       break
-               }
-               if x6.Aux != s {
+               _ = x6.Args[2]
+               if ptr != x6.Args[0] {
                        break
                }
-               = x6.Args[1]
-               if p != x6.Args[0] {
+               x6_1 := x6.Args[1]
+               if x6_1.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x6.Args[1] {
+               if x6_1.AuxInt != 6 {
                        break
                }
-               y7 := v.Args[1]
-               if y7.Op != OpARM64MOVDnop {
+               if idx != x6_1.Args[0] {
                        break
                }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
+               if mem != x6.Args[2] {
                        break
                }
-               if x7.AuxInt != 7 {
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
-               if x7.Aux != s {
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORN_0(v *Value) bool {
+       // match: (ORN x (MOVDconst [c]))
+       // cond:
+       // result: (ORconst [^c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x7.Args[1]
-               if p != x7.Args[0] {
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = ^c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORN x x)
+       // cond:
+       // result: (MOVDconst [-1])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               if mem != x7.Args[1] {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (ORN x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORNshiftLL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SLLconst {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-               v1.AddArg(ptr0)
-               v1.AddArg(idx0)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.reset(OpARM64ORNshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
+       // match: (ORN x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORNshiftRL x0 y [c])
        for {
-               t := v.Type
                _ = v.Args[1]
-               y7 := v.Args[0]
-               if y7.Op != OpARM64MOVDnop {
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRLconst {
                        break
                }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               if x7.AuxInt != 7 {
+               v.reset(OpARM64ORNshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORN x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORNshiftRA x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               s := x7.Aux
-               _ = x7.Args[1]
-               p := x7.Args[0]
-               mem := x7.Args[1]
-               o0 := v.Args[1]
-               if o0.Op != OpARM64ORshiftLL {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               if o0.AuxInt != 8 {
+               v.reset(OpARM64ORNshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORNshiftLL_0(v *Value) bool {
+       // match: (ORNshiftLL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ORconst x [^int64(uint64(c)<<uint64(d))])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = ^int64(uint64(c) << uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORNshiftLL x (SLLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [-1])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               if o1.AuxInt != 16 {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
+               if !(c == d) {
                        break
                }
-               if o2.AuxInt != 24 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORNshiftRA_0(v *Value) bool {
+       // match: (ORNshiftRA x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ORconst x [^(c>>uint64(d))])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = o2.Args[1]
-               o3 := o2.Args[0]
-               if o3.Op != OpARM64ORshiftLL {
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = ^(c >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORNshiftRA x (SRAconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [-1])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
                        break
                }
-               if o3.AuxInt != 32 {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               _ = o3.Args[1]
-               o4 := o3.Args[0]
-               if o4.Op != OpARM64ORshiftLL {
+               if !(c == d) {
                        break
                }
-               if o4.AuxInt != 40 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORNshiftRL_0(v *Value) bool {
+       // match: (ORNshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ORconst x [^int64(uint64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = o4.Args[1]
-               o5 := o4.Args[0]
-               if o5.Op != OpARM64ORshiftLL {
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = ^int64(uint64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORNshiftRL x (SRLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [-1])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               if o5.AuxInt != 48 {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               _ = o5.Args[1]
-               s0 := o5.Args[0]
-               if s0.Op != OpARM64SLLconst {
+               if !(c == d) {
                        break
                }
-               if s0.AuxInt != 56 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORconst_0(v *Value) bool {
+       // match: (ORconst [0] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORconst [-1] _)
+       // cond:
+       // result: (MOVDconst [-1])
+       for {
+               if v.AuxInt != -1 {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (ORconst [c] (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c|d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
-               }
-               _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               if mem != x0.Args[2] {
+               }
+               d := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c | d
+               return true
+       }
+       // match: (ORconst [c] (ORconst [d] x))
+       // cond:
+       // result: (ORconst [c|d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ORconst {
                        break
                }
-               y1 := o5.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c | d
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ORshiftLL (MOVDconst [c]) x [d])
+       // cond:
+       // result: (ORconst [c] (SLLconst <x.Type> x [d]))
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (ORshiftLL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ORconst x [int64(uint64(c)<<uint64(d))])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if x1.AuxInt != 1 {
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL x y:(SLLconst x [c]) [d])
+       // cond: c==d
+       // result: y
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SLLconst {
                        break
                }
-               if x1.Aux != s {
+               c := y.AuxInt
+               if x != y.Args[0] {
                        break
                }
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               if !(c == d) {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x1.Args[1] {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORshiftLL [c] (SRLconst x [64-c]) x)
+       // cond:
+       // result: (RORconst [64-c] x)
+       for {
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               y2 := o4.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               if v_0.AuxInt != 64-c {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               x := v_0.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               if x2.AuxInt != 2 {
+               v.reset(OpARM64RORconst)
+               v.AuxInt = 64 - c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL <t> [c] (UBFX [bfc] x) x)
+       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
+       // result: (RORWconst [32-c] x)
+       for {
+               t := v.Type
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               if x2.Aux != s {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
                        break
                }
-               if mem != x2.Args[1] {
+               v.reset(OpARM64RORWconst)
+               v.AuxInt = 32 - c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL [c] (SRLconst x [64-c]) x2)
+       // cond:
+       // result: (EXTRconst [64-c] x2 x)
+       for {
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               y3 := o3.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               if v_0.AuxInt != 64-c {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               x := v_0.Args[0]
+               x2 := v.Args[1]
+               v.reset(OpARM64EXTRconst)
+               v.AuxInt = 64 - c
+               v.AddArg(x2)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL <t> [c] (UBFX [bfc] x) x2)
+       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
+       // result: (EXTRWconst [32-c] x2 x)
+       for {
+               t := v.Type
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               if x3.AuxInt != 3 {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               x2 := v.Args[1]
+               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
                        break
                }
-               if x3.Aux != s {
+               v.reset(OpARM64EXTRWconst)
+               v.AuxInt = 32 - c
+               v.AddArg(x2)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y))
+       // cond: sc == getARM64BFwidth(bfc)
+       // result: (BFXIL [bfc] y x)
+       for {
+               sc := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               if mem != x3.Args[1] {
+               if v_1.AuxInt != sc {
                        break
                }
-               y4 := o2.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               y := v_1.Args[0]
+               if !(sc == getARM64BFwidth(bfc)) {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               v.reset(OpARM64BFXIL)
+               v.AuxInt = bfc
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 8 {
                        break
                }
-               if x4.AuxInt != 4 {
+               _ = v.Args[1]
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               if x4.Aux != s {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := v.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               if mem != x4.Args[1] {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUload {
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
                        break
                }
-               if x5.AuxInt != 5 {
+               if mem != x1.Args[1] {
                        break
                }
-               if x5.Aux != s {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
-               _ = x5.Args[1]
-               if p != x5.Args[0] {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 8 {
                        break
                }
-               if mem != x5.Args[1] {
+               _ = v.Args[1]
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUload {
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := v.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               if x6.AuxInt != 6 {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x6.Aux != s {
+               if x1.AuxInt != 1 {
                        break
                }
-               _ = x6.Args[1]
-               if p != x6.Args[0] {
+               s := x1.Aux
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
-               if mem != x6.Args[1] {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-               v1.AddArg(ptr0)
-               v1.AddArg(idx0)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64ORN_0(v *Value) bool {
-       // match: (ORN x (MOVDconst [c]))
-       // cond:
-       // result: (ORconst [^c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = ^c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORN x x)
-       // cond:
-       // result: (MOVDconst [-1])
+func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               t := v.Type
+               if v.AuxInt != 8 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
-               return true
-       }
-       // match: (ORN x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORNshiftLL x0 y [c])
-       for {
                _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SLLconst {
-                       break
-               }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               v.reset(OpARM64ORNshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (ORN x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORNshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRLconst {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := v.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               v.reset(OpARM64ORNshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (ORN x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORNshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRAconst {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               v.reset(OpARM64ORNshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORNshiftLL_0(v *Value) bool {
-       // match: (ORNshiftLL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ORconst x [^int64(uint64(c)<<uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = ^int64(uint64(c) << uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORNshiftLL x (SLLconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [-1])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               if x1_1.AuxInt != 1 {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               if idx != x1_1.Args[0] {
                        break
                }
-               if !(c == d) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORNshiftRA_0(v *Value) bool {
-       // match: (ORNshiftRA x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ORconst x [^(c>>uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if !(x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = ^(c >> uint64(d))
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORNshiftRA x (SRAconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [-1])
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
+       // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRAconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(c == d) {
+               t := v.Type
+               if v.AuxInt != 24 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORNshiftRL_0(v *Value) bool {
-       // match: (ORNshiftRL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ORconst x [^int64(uint64(c)>>uint64(d))])
-       for {
-               d := v.AuxInt
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = ^int64(uint64(c) >> uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORNshiftRL x (SRLconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [-1])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if o0.AuxInt != 16 {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               _ = o0.Args[1]
+               x0 := o0.Args[0]
+               if x0.Op != OpARM64MOVHUload {
                        break
                }
-               if !(c == d) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o0.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORconst_0(v *Value) bool {
-       // match: (ORconst [0] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 0 {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORconst [-1] _)
-       // cond:
-       // result: (MOVDconst [-1])
-       for {
-               if v.AuxInt != -1 {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
-               return true
-       }
-       // match: (ORconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c|d])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c | d
-               return true
-       }
-       // match: (ORconst [c] (ORconst [d] x))
-       // cond:
-       // result: (ORconst [c|d] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ORconst {
+               if mem != x1.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c | d
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ORshiftLL (MOVDconst [c]) x [d])
-       // cond:
-       // result: (ORconst [c] (SLLconst <x.Type> x [d]))
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = d
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (ORshiftLL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ORconst x [int64(uint64(c)<<uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64(uint64(c) << uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL x y:(SLLconst x [c]) [d])
-       // cond: c==d
-       // result: y
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpARM64SLLconst {
+               i3 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               c := y.AuxInt
-               if x != y.Args[0] {
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
                        break
                }
-               if !(c == d) {
+               if mem != x2.Args[1] {
                        break
                }
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL [c] (SRLconst x [64-c]) x)
-       // cond:
-       // result: (RORconst [64-c] x)
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
        for {
-               c := v.AuxInt
+               t := v.Type
+               if v.AuxInt != 24 {
+                       break
+               }
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               if v_0.AuxInt != 64-c {
+               if o0.AuxInt != 16 {
                        break
                }
-               x := v_0.Args[0]
-               if x != v.Args[1] {
+               _ = o0.Args[1]
+               x0 := o0.Args[0]
+               if x0.Op != OpARM64MOVHUloadidx {
                        break
                }
-               v.reset(OpARM64RORconst)
-               v.AuxInt = 64 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL <t> [c] (UBFX [bfc] x) x)
-       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
-       // result: (RORWconst [32-c] x)
-       for {
-               t := v.Type
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFX {
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o0.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               if x != v.Args[1] {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
+               if x1.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64RORWconst)
-               v.AuxInt = 32 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL [c] (SRLconst x [64-c]) x2)
-       // cond:
-       // result: (EXTRconst [64-c] x2 x)
-       for {
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               s := x1.Aux
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
-               if v_0.AuxInt != 64-c {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
                        break
                }
-               x := v_0.Args[0]
-               x2 := v.Args[1]
-               v.reset(OpARM64EXTRconst)
-               v.AuxInt = 64 - c
-               v.AddArg(x2)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL <t> [c] (UBFX [bfc] x) x2)
-       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
-       // result: (EXTRWconst [32-c] x2 x)
-       for {
-               t := v.Type
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFX {
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               x2 := v.Args[1]
-               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64EXTRWconst)
-               v.AuxInt = 32 - c
-               v.AddArg(x2)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y))
-       // cond: sc == getARM64BFwidth(bfc)
-       // result: (BFXIL [bfc] y x)
-       for {
-               sc := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFX {
+               if x2.AuxInt != 3 {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if x2.Aux != s {
                        break
                }
-               if v_1.AuxInt != sc {
+               _ = x2.Args[1]
+               p := x2.Args[0]
+               if mem != x2.Args[1] {
                        break
                }
-               y := v_1.Args[0]
-               if !(sc == getARM64BFwidth(bfc)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
                        break
                }
-               v.reset(OpARM64BFXIL)
-               v.AuxInt = bfc
-               v.AddArg(y)
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx ptr idx mem) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
        for {
                t := v.Type
-               if v.AuxInt != 8 {
+               if v.AuxInt != 24 {
                        break
                }
                _ = v.Args[1]
-               y0 := v.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if o0.AuxInt != 16 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := v.Args[1]
+               _ = o0.Args[1]
+               x0 := o0.Args[0]
+               if x0.Op != OpARM64MOVHUloadidx {
+                       break
+               }
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o0.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
                x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               = x1.Args[1]
-               if p != x1.Args[0] {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x1.Args[1] {
+               if x1_1.AuxInt != 2 {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               if idx != x1_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
+               if mem != x1.Args[2] {
+                       break
+               }
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x2_1.AuxInt != 3 {
+                       break
+               }
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
-               v0.AddArg(v1)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx2 ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
        for {
                t := v.Type
-               if v.AuxInt != 8 {
+               if v.AuxInt != 24 {
                        break
                }
                _ = v.Args[1]
-               y0 := v.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
+               if o0.AuxInt != 16 {
+                       break
+               }
+               _ = o0.Args[1]
+               x0 := o0.Args[0]
+               if x0.Op != OpARM64MOVHUloadidx2 {
                        break
                }
                _ = x0.Args[2]
                ptr0 := x0.Args[0]
                idx0 := x0.Args[1]
                mem := x0.Args[2]
-               y1 := v.Args[1]
+               y1 := o0.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -17468,13 +21957,16 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != 1 {
+               if x1.AuxInt != 2 {
                        break
                }
                s := x1.Aux
                _ = x1.Args[1]
                p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               if p1.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if p1.AuxInt != 1 {
                        break
                }
                _ = p1.Args[1]
@@ -17483,29 +21975,46 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 3 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               p := x2.Args[0]
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AddArg(ptr0)
-               v0.AddArg(idx0)
+               v1 := b.NewValue0(v.Pos, OpARM64SLLconst, idx0.Type)
+               v1.AuxInt = 1
+               v1.AddArg(idx0)
+               v0.AddArg(v1)
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
-       // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
+       // cond: i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
        for {
                t := v.Type
-               if v.AuxInt != 24 {
+               if v.AuxInt != 56 {
                        break
                }
                _ = v.Args[1]
@@ -17513,12 +22022,28 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               if o0.AuxInt != 16 {
+               if o0.AuxInt != 48 {
                        break
                }
                _ = o0.Args[1]
-               x0 := o0.Args[0]
-               if x0.Op != OpARM64MOVHUload {
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 40 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 32 {
+                       break
+               }
+               _ = o2.Args[1]
+               x0 := o2.Args[0]
+               if x0.Op != OpARM64MOVWUload {
                        break
                }
                i0 := x0.AuxInt
@@ -17526,7 +22051,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                _ = x0.Args[1]
                p := x0.Args[0]
                mem := x0.Args[1]
-               y1 := o0.Args[1]
+               y1 := o2.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -17534,7 +22059,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x1.AuxInt
+               i4 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -17545,7 +22070,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               y2 := v.Args[1]
+               y2 := o1.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
@@ -17553,7 +22078,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x2.AuxInt
+               i5 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -17564,11 +22089,49 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               y3 := o0.Args[1]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := v.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               if !(i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.Aux = s
@@ -17579,32 +22142,48 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
        for {
                t := v.Type
-               if v.AuxInt != 24 {
+               if v.AuxInt != 56 {
+                       break
+               }
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 48 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
+               if o1.AuxInt != 40 {
                        break
                }
-               if o0.AuxInt != 16 {
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
                        break
                }
-               _ = o0.Args[1]
-               x0 := o0.Args[0]
-               if x0.Op != OpARM64MOVHUloadidx {
+               if o2.AuxInt != 32 {
+                       break
+               }
+               _ = o2.Args[1]
+               x0 := o2.Args[0]
+               if x0.Op != OpARM64MOVWUloadidx {
                        break
                }
                _ = x0.Args[2]
                ptr0 := x0.Args[0]
                idx0 := x0.Args[1]
                mem := x0.Args[2]
-               y1 := o0.Args[1]
+               y1 := o2.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -17612,7 +22191,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != 2 {
+               if x1.AuxInt != 4 {
                        break
                }
                s := x1.Aux
@@ -17627,7 +22206,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               y2 := v.Args[1]
+               y2 := o1.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
@@ -17635,7 +22214,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != 3 {
+               if x2.AuxInt != 5 {
                        break
                }
                if x2.Aux != s {
@@ -17646,11 +22225,53 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               y3 := o0.Args[1]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x3.AuxInt != 6 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := v.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x4.AuxInt != 7 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AddArg(ptr0)
@@ -17658,9 +22279,9 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
-       // cond: i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx4 ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
        for {
                t := v.Type
                if v.AuxInt != 56 {
@@ -17692,14 +22313,13 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                }
                _ = o2.Args[1]
                x0 := o2.Args[0]
-               if x0.Op != OpARM64MOVWUload {
+               if x0.Op != OpARM64MOVWUloadidx4 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
                y1 := o2.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -17708,14 +22328,21 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x1.AuxInt
-               if x1.Aux != s {
+               if x1.AuxInt != 4 {
                        break
                }
+               s := x1.Aux
                _ = x1.Args[1]
-               if p != x1.Args[0] {
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if p1.AuxInt != 2 {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
@@ -17727,14 +22354,14 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x2.AuxInt
-               if x2.Aux != s {
+               if x2.AuxInt != 5 {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if x2.Aux != s {
                        break
                }
+               _ = x2.Args[1]
+               p := x2.Args[0]
                if mem != x2.Args[1] {
                        break
                }
@@ -17746,7 +22373,9 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x3.AuxInt
+               if x3.AuxInt != 6 {
+                       break
+               }
                if x3.Aux != s {
                        break
                }
@@ -17765,7 +22394,9 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               i7 := x4.AuxInt
+               if x4.AuxInt != 7 {
+                       break
+               }
                if x4.Aux != s {
                        break
                }
@@ -17776,24 +22407,24 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if mem != x4.Args[1] {
                        break
                }
-               if !(i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
+               v0.AddArg(ptr0)
+               v1 := b.NewValue0(v.Pos, OpARM64SLLconst, idx0.Type)
+               v1.AuxInt = 2
+               v1.AddArg(idx0)
                v0.AddArg(v1)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx ptr idx mem) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
        for {
                t := v.Type
                if v.AuxInt != 56 {
@@ -17829,30 +22460,32 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
+               ptr := x0.Args[0]
+               idx := x0.Args[1]
                mem := x0.Args[2]
                y1 := o2.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
                x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x1.AuxInt != 4 {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               s := x1.Aux
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x1.Args[1] {
+               if x1_1.AuxInt != 4 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
                        break
                }
                y2 := o1.Args[1]
@@ -17860,18 +22493,24 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x2.AuxInt != 5 {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               if x2.Aux != s {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x2.Args[1]
-               p := x2.Args[0]
-               if mem != x2.Args[1] {
+               if x2_1.AuxInt != 5 {
+                       break
+               }
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
                        break
                }
                y3 := o0.Args[1]
@@ -17879,20 +22518,24 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               if x3.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x3.AuxInt != 6 {
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
                        break
                }
-               if x3.Aux != s {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] {
+               if x3_1.AuxInt != 6 {
                        break
                }
-               if mem != x3.Args[1] {
+               if idx != x3_1.Args[0] {
+                       break
+               }
+               if mem != x3.Args[2] {
                        break
                }
                y4 := v.Args[1]
@@ -17900,31 +22543,35 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               if x4.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x4.AuxInt != 7 {
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
                        break
                }
-               if x4.Aux != s {
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               if x4_1.AuxInt != 7 {
                        break
                }
-               if mem != x4.Args[1] {
+               if idx != x4_1.Args[0] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               if mem != x4.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4)
                v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr0)
-               v0.AddArg(idx0)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
@@ -17976,17 +22623,80 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                v0 := b.NewValue0(v.Pos, OpARM64REV16W, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORshiftLL_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
+       for {
+               t := v.Type
+               if v.AuxInt != 8 {
+                       break
+               }
+               _ = v.Args[1]
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x0.AuxInt != 1 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p1 := x0.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               mem := x0.Args[1]
+               y1 := v.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x1.Args[2]
+               ptr0 := x1.Args[0]
+               idx0 := x1.Args[1]
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
        for {
                t := v.Type
                if v.AuxInt != 8 {
@@ -17998,22 +22708,20 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x0.AuxInt != 1 {
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
                        break
                }
-               s := x0.Aux
-               _ = x0.Args[1]
-               p1 := x0.Args[0]
-               if p1.Op != OpARM64ADD {
+               if x0_1.AuxInt != 1 {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               mem := x0.Args[1]
+               idx := x0_1.Args[0]
+               mem := x0.Args[2]
                y1 := v.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -18023,12 +22731,16 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                _ = x1.Args[2]
-               ptr0 := x1.Args[0]
-               idx0 := x1.Args[1]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
                if mem != x1.Args[2] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               if !(x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -18036,8 +22748,8 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
-               v1.AddArg(ptr0)
-               v1.AddArg(idx0)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
@@ -18212,6 +22924,99 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                v0.AddArg(v1)
                return true
        }
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
+       for {
+               t := v.Type
+               if v.AuxInt != 24 {
+                       break
+               }
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 16 {
+                       break
+               }
+               _ = o0.Args[1]
+               y0 := o0.Args[0]
+               if y0.Op != OpARM64REV16W {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVHUloadidx {
+                       break
+               }
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x0_1.AuxInt != 2 {
+                       break
+               }
+               idx := x0_1.Args[0]
+               mem := x0.Args[2]
+               y1 := o0.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x1_1.AuxInt != 1 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
        // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [i4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
        // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
        // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
@@ -18494,6 +23299,165 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                v0.AddArg(v1)
                return true
        }
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
+       for {
+               t := v.Type
+               if v.AuxInt != 56 {
+                       break
+               }
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 48 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 40 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 32 {
+                       break
+               }
+               _ = o2.Args[1]
+               y0 := o2.Args[0]
+               if y0.Op != OpARM64REVW {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVWUloadidx {
+                       break
+               }
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x0_1.AuxInt != 4 {
+                       break
+               }
+               idx := x0_1.Args[0]
+               mem := x0.Args[2]
+               y1 := o2.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x1_1.AuxInt != 3 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y2 := o1.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x2_1.AuxInt != 2 {
+                       break
+               }
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
+                       break
+               }
+               y3 := o0.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
+                       break
+               }
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x3_1.AuxInt != 1 {
+                       break
+               }
+               if idx != x3_1.Args[0] {
+                       break
+               }
+               if mem != x3.Args[2] {
+                       break
+               }
+               y4 := v.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               if mem != x4.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64ORshiftRA_0(v *Value) bool {
index 631c8e6879d47d8932876cdd9cf9d430eb5b17b6..08d2d9112818755fd7450b8fc40f243f38a25b2a 100644 (file)
@@ -110,6 +110,21 @@ func load_byte2_uint16(s []byte) uint16 {
        return uint16(s[0]) | uint16(s[1])<<8
 }
 
+func load_byte2_uint16_idx(s []byte, idx int) uint16 {
+       // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
+       return uint16(s[idx<<1]) | uint16(s[(idx<<1)+1])<<8
+}
+
+func load_byte4_uint32_idx(s []byte, idx int) uint32 {
+       // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
+       return uint32(s[idx<<2]) | uint32(s[(idx<<2)+1])<<8 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+3])<<24
+}
+
+func load_byte8_uint64_idx(s []byte, idx int) uint64 {
+       // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
+       return uint64(s[idx<<3]) | uint64(s[(idx<<3)+1])<<8 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+7])<<56
+}
+
 // Check load combining across function calls.
 
 func fcall_byte(a, b byte) (byte, byte) {
@@ -268,6 +283,32 @@ func zero_byte_16(b []byte) {
        b[12], b[13], b[14], b[15] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
 }
 
+func zero_byte_2_idx(b []byte, idx int) {
+       // arm64: `MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
+       b[(idx<<1)+0] = 0
+       b[(idx<<1)+1] = 0
+}
+
+func zero_byte_4_idx(b []byte, idx int) {
+       // arm64: `MOVW\sZR,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOV[BH]`
+       b[(idx<<2)+0] = 0
+       b[(idx<<2)+1] = 0
+       b[(idx<<2)+2] = 0
+       b[(idx<<2)+3] = 0
+}
+
+func zero_byte_8_idx(b []byte, idx int) {
+       // arm64: `MOVD\sZR,\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`MOV[BHW]`
+       b[(idx<<3)+0] = 0
+       b[(idx<<3)+1] = 0
+       b[(idx<<3)+2] = 0
+       b[(idx<<3)+3] = 0
+       b[(idx<<3)+4] = 0
+       b[(idx<<3)+5] = 0
+       b[(idx<<3)+6] = 0
+       b[(idx<<3)+7] = 0
+}
+
 func zero_byte_30(a *[30]byte) {
        *a = [30]byte{} // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
 }