]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize ARM64 with register indexed load/store
authorBen Shi <powerman1st@163.com>
Mon, 16 Apr 2018 14:04:26 +0000 (14:04 +0000)
committerCherry Zhang <cherryyz@google.com>
Thu, 19 Apr 2018 15:08:10 +0000 (15:08 +0000)
ARM64 supports load/store instructions with a memory operand that
the address is calculated by base register + index register.

In this CL,
1. Some rules are added to the compile's ARM64 backend to emit
such efficient instructions.
2. A wrong rule of load combination is fixed.

The go1 benchmark does show improvement.

name                     old time/op    new time/op    delta
BinaryTree17-4              44.5s ± 2%     44.1s ± 1%   -0.81%  (p=0.000 n=28+29)
Fannkuch11-4                32.7s ± 3%     30.5s ± 0%   -6.79%  (p=0.000 n=30+26)
FmtFprintfEmpty-4           499ns ± 0%     506ns ± 5%   +1.39%  (p=0.003 n=25+30)
FmtFprintfString-4         1.07µs ± 0%    1.04µs ± 4%   -3.17%  (p=0.000 n=23+30)
FmtFprintfInt-4            1.15µs ± 4%    1.13µs ± 0%   -1.55%  (p=0.000 n=30+23)
FmtFprintfIntInt-4         1.77µs ± 4%    1.74µs ± 0%   -1.71%  (p=0.000 n=30+24)
FmtFprintfPrefixedInt-4    2.37µs ± 5%    2.12µs ± 0%  -10.56%  (p=0.000 n=30+23)
FmtFprintfFloat-4          3.03µs ± 1%    3.03µs ± 4%   -0.13%  (p=0.003 n=25+30)
FmtManyArgs-4              7.38µs ± 1%    7.43µs ± 4%   +0.59%  (p=0.003 n=25+30)
GobDecode-4                 101ms ± 6%      95ms ± 5%   -5.55%  (p=0.000 n=30+30)
GobEncode-4                78.0ms ± 4%    78.8ms ± 6%   +1.05%  (p=0.000 n=30+30)
Gzip-4                      4.25s ± 0%     4.27s ± 4%   +0.45%  (p=0.003 n=24+30)
Gunzip-4                    428ms ± 1%     420ms ± 0%   -1.88%  (p=0.000 n=23+23)
HTTPClientServer-4          549µs ± 1%     541µs ± 1%   -1.56%  (p=0.000 n=29+29)
JSONEncode-4                194ms ± 0%     188ms ± 4%     ~     (p=0.417 n=23+30)
JSONDecode-4                890ms ± 5%     831ms ± 0%   -6.55%  (p=0.000 n=30+23)
Mandelbrot200-4            47.3ms ± 2%    46.5ms ± 0%     ~     (p=0.980 n=30+26)
GoParse-4                  43.1ms ± 6%    43.8ms ± 6%   +1.65%  (p=0.000 n=30+30)
RegexpMatchEasy0_32-4      1.06µs ± 0%    1.07µs ± 3%     ~     (p=0.092 n=23+30)
RegexpMatchEasy0_1K-4      5.53µs ± 0%    5.51µs ± 0%   -0.24%  (p=0.000 n=25+25)
RegexpMatchEasy1_32-4      1.02µs ± 3%    1.01µs ± 0%   -1.27%  (p=0.000 n=30+24)
RegexpMatchEasy1_1K-4      7.26µs ± 0%    7.33µs ± 0%   +0.95%  (p=0.000 n=23+26)
RegexpMatchMedium_32-4     1.84µs ± 7%    1.79µs ± 1%     ~     (p=0.333 n=30+23)
RegexpMatchMedium_1K-4      553µs ± 0%     547µs ± 0%   -1.14%  (p=0.000 n=24+22)
RegexpMatchHard_32-4       30.8µs ± 1%    30.3µs ± 0%   -1.40%  (p=0.000 n=24+24)
RegexpMatchHard_1K-4        928µs ± 0%     929µs ± 5%   +0.12%  (p=0.013 n=23+30)
Revcomp-4                   8.13s ± 4%     6.32s ± 1%  -22.23%  (p=0.000 n=30+23)
Template-4                  899ms ± 6%     854ms ± 1%   -5.01%  (p=0.000 n=30+24)
TimeParse-4                4.66µs ± 4%    4.59µs ± 1%   -1.57%  (p=0.000 n=30+23)
TimeFormat-4               4.58µs ± 0%    4.61µs ± 0%   +0.57%  (p=0.000 n=26+24)
[Geo mean]                  717µs          698µs        -2.55%

name                     old speed      new speed      delta
GobDecode-4              7.63MB/s ± 6%  8.08MB/s ± 5%   +5.88%  (p=0.000 n=30+30)
GobEncode-4              9.85MB/s ± 4%  9.75MB/s ± 6%   -1.04%  (p=0.000 n=30+30)
Gzip-4                   4.56MB/s ± 0%  4.55MB/s ± 4%   -0.36%  (p=0.003 n=24+30)
Gunzip-4                 45.3MB/s ± 1%  46.2MB/s ± 0%   +1.92%  (p=0.000 n=23+23)
JSONEncode-4             10.0MB/s ± 0%  10.4MB/s ± 4%     ~     (p=0.403 n=23+30)
JSONDecode-4             2.18MB/s ± 5%  2.33MB/s ± 0%   +6.91%  (p=0.000 n=30+23)
GoParse-4                1.34MB/s ± 5%  1.32MB/s ± 5%   -1.66%  (p=0.000 n=30+30)
RegexpMatchEasy0_32-4    30.2MB/s ± 0%  29.8MB/s ± 3%     ~     (p=0.099 n=23+30)
RegexpMatchEasy0_1K-4     185MB/s ± 0%   186MB/s ± 0%   +0.24%  (p=0.000 n=25+25)
RegexpMatchEasy1_32-4    31.4MB/s ± 3%  31.8MB/s ± 0%   +1.24%  (p=0.000 n=30+24)
RegexpMatchEasy1_1K-4     141MB/s ± 0%   140MB/s ± 0%   -0.94%  (p=0.000 n=23+26)
RegexpMatchMedium_32-4    541kB/s ± 6%   560kB/s ± 0%   +3.45%  (p=0.000 n=30+23)
RegexpMatchMedium_1K-4   1.85MB/s ± 0%  1.87MB/s ± 0%   +1.08%  (p=0.000 n=24+23)
RegexpMatchHard_32-4     1.04MB/s ± 1%  1.06MB/s ± 1%   +1.48%  (p=0.000 n=24+24)
RegexpMatchHard_1K-4     1.10MB/s ± 0%  1.10MB/s ± 5%   +0.15%  (p=0.004 n=23+30)
Revcomp-4                31.3MB/s ± 4%  40.2MB/s ± 1%  +28.52%  (p=0.000 n=30+23)
Template-4               2.16MB/s ± 6%  2.27MB/s ± 1%   +5.18%  (p=0.000 n=30+24)
[Geo mean]               7.57MB/s       7.79MB/s        +2.98%

fixes #24907

Change-Id: I94afd0e3f53d62a1cf5e452f3dd6daf61be21785
Reviewed-on: https://go-review.googlesource.com/107376
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
test/codegen/memcombine.go

index 11e7002df401cd72bc32c6668016d6019adc01de..e194f9c40313784f5ea6cfa1fe498feb20343de4 100644 (file)
@@ -341,6 +341,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                gc.AddAux(&p.From, v)
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
+       case ssa.OpARM64MOVBloadidx,
+               ssa.OpARM64MOVBUloadidx,
+               ssa.OpARM64MOVHloadidx,
+               ssa.OpARM64MOVHUloadidx,
+               ssa.OpARM64MOVWloadidx,
+               ssa.OpARM64MOVWUloadidx,
+               ssa.OpARM64MOVDloadidx:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_MEM
+               p.From.Name = obj.NAME_NONE
+               p.From.Reg = v.Args[0].Reg()
+               p.From.Index = v.Args[1].Reg()
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg()
        case ssa.OpARM64LDAR,
                ssa.OpARM64LDARW:
                p := s.Prog(v.Op.Asm())
@@ -363,6 +377,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Type = obj.TYPE_MEM
                p.To.Reg = v.Args[0].Reg()
                gc.AddAux(&p.To, v)
+       case ssa.OpARM64MOVBstoreidx,
+               ssa.OpARM64MOVHstoreidx,
+               ssa.OpARM64MOVWstoreidx,
+               ssa.OpARM64MOVDstoreidx:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = v.Args[2].Reg()
+               p.To.Type = obj.TYPE_MEM
+               p.To.Name = obj.NAME_NONE
+               p.To.Reg = v.Args[0].Reg()
+               p.To.Index = v.Args[1].Reg()
        case ssa.OpARM64STP:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REGREG
@@ -381,6 +406,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Type = obj.TYPE_MEM
                p.To.Reg = v.Args[0].Reg()
                gc.AddAux(&p.To, v)
+       case ssa.OpARM64MOVBstorezeroidx,
+               ssa.OpARM64MOVHstorezeroidx,
+               ssa.OpARM64MOVWstorezeroidx,
+               ssa.OpARM64MOVDstorezeroidx:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = arm64.REGZERO
+               p.To.Type = obj.TYPE_MEM
+               p.To.Name = obj.NAME_NONE
+               p.To.Reg = v.Args[0].Reg()
+               p.To.Index = v.Args[1].Reg()
        case ssa.OpARM64MOVQstorezero:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REGREG
index edeadfd1d25fdf8435fc85a4a179abb53862fda7..41417482e8090949371ab69c11445e0b9c99f065 100644 (file)
        && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
        (FMOVDload [off1+off2] {sym} ptr mem)
 
+// register indexed load
+(MOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVDloadidx ptr idx mem)
+(MOVWUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWUloadidx ptr idx mem)
+(MOVWload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWloadidx ptr idx mem)
+(MOVHUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHUloadidx ptr idx mem)
+(MOVHload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHloadidx ptr idx mem)
+(MOVBUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBUloadidx ptr idx mem)
+(MOVBload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBloadidx ptr idx mem)
+(MOVDloadidx ptr (MOVDconst [c]) mem) -> (MOVDload [c] ptr mem)
+(MOVDloadidx (MOVDconst [c]) ptr mem) -> (MOVDload [c] ptr mem)
+(MOVWUloadidx ptr (MOVDconst [c]) mem) -> (MOVWUload [c] ptr mem)
+(MOVWUloadidx (MOVDconst [c]) ptr mem) -> (MOVWUload [c] ptr mem)
+(MOVWloadidx ptr (MOVDconst [c]) mem) -> (MOVWload [c] ptr mem)
+(MOVWloadidx (MOVDconst [c]) ptr mem) -> (MOVWload [c] ptr mem)
+(MOVHUloadidx ptr (MOVDconst [c]) mem) -> (MOVHUload [c] ptr mem)
+(MOVHUloadidx (MOVDconst [c]) ptr mem) -> (MOVHUload [c] ptr mem)
+(MOVHloadidx ptr (MOVDconst [c]) mem) -> (MOVHload [c] ptr mem)
+(MOVHloadidx (MOVDconst [c]) ptr mem) -> (MOVHload [c] ptr mem)
+(MOVBUloadidx ptr (MOVDconst [c]) mem) -> (MOVBUload [c] ptr mem)
+(MOVBUloadidx (MOVDconst [c]) ptr mem) -> (MOVBUload [c] ptr mem)
+(MOVBloadidx ptr (MOVDconst [c]) mem) -> (MOVBload [c] ptr mem)
+(MOVBloadidx (MOVDconst [c]) ptr mem) -> (MOVBload [c] ptr mem)
+
 (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
        && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
        (MOVBstore [off1+off2] {sym} ptr val mem)
        && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
        (MOVQstorezero [off1+off2] {sym} ptr mem)
 
+// register indexed store
+(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVDstoreidx ptr idx val mem)
+(MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVWstoreidx ptr idx val mem)
+(MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVHstoreidx ptr idx val mem)
+(MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVBstoreidx ptr idx val mem)
+(MOVDstoreidx ptr (MOVDconst [c]) val mem) -> (MOVDstore [c] ptr val mem)
+(MOVDstoreidx (MOVDconst [c]) idx val mem) -> (MOVDstore [c] idx val mem)
+(MOVWstoreidx ptr (MOVDconst [c]) val mem) -> (MOVWstore [c] ptr val mem)
+(MOVWstoreidx (MOVDconst [c]) idx val mem) -> (MOVWstore [c] idx val mem)
+(MOVHstoreidx ptr (MOVDconst [c]) val mem) -> (MOVHstore [c] ptr val mem)
+(MOVHstoreidx (MOVDconst [c]) idx val mem) -> (MOVHstore [c] idx val mem)
+(MOVBstoreidx ptr (MOVDconst [c]) val mem) -> (MOVBstore [c] ptr val mem)
+(MOVBstoreidx (MOVDconst [c]) idx val mem) -> (MOVBstore [c] idx val mem)
+
 (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        && canMergeSym(sym1,sym2) && is32Bit(off1+off2)
        && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
 (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem)
 (STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem) -> (MOVQstorezero [off] {sym} ptr mem)
 
+// register indexed store zero
+(MOVDstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVDstorezeroidx ptr idx mem)
+(MOVWstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWstorezeroidx ptr idx mem)
+(MOVHstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHstorezeroidx ptr idx mem)
+(MOVBstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBstorezeroidx ptr idx mem)
+(MOVDstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVDstorezeroidx ptr idx mem)
+(MOVWstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVWstorezeroidx ptr idx mem)
+(MOVHstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVHstorezeroidx ptr idx mem)
+(MOVBstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVBstorezeroidx ptr idx mem)
+(MOVDstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVDstorezero [c] ptr mem)
+(MOVDstorezeroidx (MOVDconst [c]) idx mem) -> (MOVDstorezero [c] idx mem)
+(MOVWstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVWstorezero [c] ptr mem)
+(MOVWstorezeroidx (MOVDconst [c]) idx mem) -> (MOVWstorezero [c] idx mem)
+(MOVHstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVHstorezero [c] ptr mem)
+(MOVHstorezeroidx (MOVDconst [c]) idx mem) -> (MOVHstorezero [c] idx mem)
+(MOVBstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVBstorezero [c] ptr mem)
+(MOVBstorezeroidx (MOVDconst [c]) idx mem) -> (MOVBstorezero [c] idx mem)
+
 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
 // these seem to have bad interaction with other rules, resulting in slower code
 //(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x)
 (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
 (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
 
+(MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
+       && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
+(MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
+       && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
+(MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
+       && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
+(MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
+       && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
+(MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
+       && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
+(MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
+       && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
+(MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
+       && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
+
 // don't extend after proper load
 (MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
 (MOVBUreg x:(MOVBUload _ _)) -> (MOVDreg x)
 (MOVWUreg x:(MOVBUload _ _)) -> (MOVDreg x)
 (MOVWUreg x:(MOVHUload _ _)) -> (MOVDreg x)
 (MOVWUreg x:(MOVWUload _ _)) -> (MOVDreg x)
+(MOVBreg x:(MOVBloadidx _  _ _)) -> (MOVDreg x)
+(MOVBUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
+(MOVHreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x)
+(MOVHreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
+(MOVHreg x:(MOVHloadidx _ _ _)) -> (MOVDreg x)
+(MOVHUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
+(MOVHUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVHloadidx _ _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVWloadidx _ _ _)) -> (MOVDreg x)
+(MOVWUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
+(MOVWUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
+(MOVWUreg x:(MOVWUloadidx _ _ _)) -> (MOVDreg x)
 
 // fold double extensions
 (MOVBreg x:(MOVBreg _)) -> (MOVDreg x)
 (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
 (MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
 (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
+(MOVBstoreidx ptr idx (MOVBreg x) mem) -> (MOVBstoreidx ptr idx x mem)
+(MOVBstoreidx ptr idx (MOVBUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
+(MOVBstoreidx ptr idx (MOVHreg x) mem) -> (MOVBstoreidx ptr idx x mem)
+(MOVBstoreidx ptr idx (MOVHUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
+(MOVBstoreidx ptr idx (MOVWreg x) mem) -> (MOVBstoreidx ptr idx x mem)
+(MOVBstoreidx ptr idx (MOVWUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
+(MOVHstoreidx ptr idx (MOVHreg x) mem) -> (MOVHstoreidx ptr idx x mem)
+(MOVHstoreidx ptr idx (MOVHUreg x) mem) -> (MOVHstoreidx ptr idx x mem)
+(MOVHstoreidx ptr idx (MOVWreg x) mem) -> (MOVHstoreidx ptr idx x mem)
+(MOVHstoreidx ptr idx (MOVWUreg x) mem) -> (MOVHstoreidx ptr idx x mem)
+(MOVWstoreidx ptr idx (MOVWreg x) mem) -> (MOVWstoreidx ptr idx x mem)
+(MOVWstoreidx ptr idx (MOVWUreg x) mem) -> (MOVWstoreidx ptr idx x mem)
 
 // if a register move has only 1 use, just use the same register without emitting instruction
 // MOVDnop doesn't emit instruction, only for ensuring the type.
        && clobber(x0) && clobber(x1)
        && clobber(y0) && clobber(y1)
        -> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+(ORshiftLL <t> [8]
+       y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))
+       y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1
+       && mergePoint(b,x0,x1) != nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x0) && clobber(x1)
+       && clobber(y0) && clobber(y1)
+       -> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
 
 // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit
 (ORshiftLL <t> [24] o0:(ORshiftLL [16]
        && clobber(y1) && clobber(y2)
        && clobber(o0)
        -> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+(ORshiftLL <t> [24] o0:(ORshiftLL [16]
+                   x0:(MOVHUloadidx ptr0 idx0 mem)
+       y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem)))
+       y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
+       && y1.Uses == 1 && y2.Uses == 1
+       && o0.Uses == 1
+       && mergePoint(b,x0,x1,x2) != nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0) && clobber(x1) && clobber(x2)
+       && clobber(y1) && clobber(y2)
+       && clobber(o0)
+       -> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
 
 // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit
 (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
        && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
        && clobber(o0) && clobber(o1) && clobber(o2)
        -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
+                   x0:(MOVWUloadidx ptr0 idx0 mem)
+       y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem)))
+       y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
+       && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3,x4) != nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
+       && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
+       && clobber(o0) && clobber(o1) && clobber(o2)
+       -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
 
 // b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 32-bit
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
        && clobber(o0) && clobber(o1) && clobber(s0)
        -> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
+       y0:(MOVDnop x0:(MOVBUload [3] {s} p mem)))
+       y1:(MOVDnop x1:(MOVBUload [2] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
+       y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3) != nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
+       && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
+       && clobber(o0) && clobber(o1) && clobber(s0)
+       -> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
 
-// b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit, reverse
+// b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
        y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem)))
        y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem)))
        && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
        && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
        && clobber(o4) && clobber(o5) && clobber(s0)
-       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
+       y0:(MOVDnop x0:(MOVBUload [7] {s} p mem)))
+       y1:(MOVDnop x1:(MOVBUload [6] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [4] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [3] {s} p mem)))
+       y5:(MOVDnop x5:(MOVBUload [2] {s} p mem)))
+       y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
+       y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
+       && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
+       && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
+       && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
+       && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
+       && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
+       && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
+       && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
+       && clobber(o4) && clobber(o5) && clobber(s0)
+       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
 
 // big endian loads
 // b[1] | b[0]<<8 -> load 16-bit, reverse
        && clobber(x0) && clobber(x1)
        && clobber(y0) && clobber(y1)
        -> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
+(ORshiftLL <t> [8]
+       y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))
+       y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1
+       && mergePoint(b,x0,x1) != nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x0) && clobber(x1)
+       && clobber(y0) && clobber(y1)
+       -> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
 
 // b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit, reverse
 (ORshiftLL <t> [24] o0:(ORshiftLL [16]
        && clobber(y0) && clobber(y1) && clobber(y2)
        && clobber(o0)
        -> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+(ORshiftLL <t> [24] o0:(ORshiftLL [16]
+       y0:(REV16W  x0:(MOVHUload [2] {s} p mem))
+       y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
+       y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
+       && o0.Uses == 1
+       && mergePoint(b,x0,x1,x2) != nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0) && clobber(x1) && clobber(x2)
+       && clobber(y0) && clobber(y1) && clobber(y2)
+       && clobber(o0)
+       -> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
 
 // b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit, reverse
 (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
        && clobber(o0) && clobber(o1) && clobber(o2)
        -> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
+       y0:(REVW    x0:(MOVWUload [4] {s} p mem))
+       y1:(MOVDnop x1:(MOVBUload [3] {s} p mem)))
+       y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
+       y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3,x4) != nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
+       && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
+       && clobber(o0) && clobber(o1) && clobber(o2)
+       -> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
 
 // b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] -> load 32-bit, reverse
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
        && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
        && clobber(o0) && clobber(o1) && clobber(s0)
        -> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
+       y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
+       y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
+       y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3) != nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
+       && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
+       && clobber(o0) && clobber(o1) && clobber(s0)
+       -> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
 
 // b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] -> load 64-bit, reverse
 (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
        && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
        && clobber(o4) && clobber(o5) && clobber(s0)
        -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
+       y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
+       y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
+       y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
+       y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
+       y4:(MOVDnop x4:(MOVBUload [4] {s} p mem)))
+       y5:(MOVDnop x5:(MOVBUload [5] {s} p mem)))
+       y6:(MOVDnop x6:(MOVBUload [6] {s} p mem)))
+       y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
+       && s == nil
+       && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
+       && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
+       && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
+       && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
+       && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
+       && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
+       && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
+       && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
+       && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
+       && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
+       && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
+       && clobber(o4) && clobber(o5) && clobber(s0)
+       -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
 
 // Combine zero stores into larger (unaligned) stores.
 (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVHstorezero [min(i,j)] {s} ptr0 mem)
+(MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstorezeroidx ptr1 idx1 mem)
 (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
        && x.Uses == 1
        && areAdjacentOffsets(i,j,2)
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVWstorezero [min(i,j)] {s} ptr0 mem)
+(MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVWstorezeroidx ptr1 idx1 mem)
 (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
        && x.Uses == 1
        && areAdjacentOffsets(i,j,4)
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVDstorezero [min(i,j)] {s} ptr0 mem)
+(MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVDstorezeroidx ptr1 idx1 mem)
 (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
        && x.Uses == 1
        && areAdjacentOffsets(i,j,8)
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVQstorezero [min(i,j)] {s} ptr0 mem)
+(MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVQstorezero [0] {s} p0 mem)
 
 // Combine stores into larger (unaligned) stores.
 (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr0 w mem)
+(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstoreidx ptr1 idx1 w mem)
 (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr0 w mem)
+(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstoreidx ptr1 idx1 w mem)
 (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr0 w mem)
+(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstoreidx ptr1 idx1 w mem)
 (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr0 w mem)
+(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstoreidx ptr1 idx1 w mem)
 (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr0 w0 mem)
+(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstoreidx ptr1 idx1 w0 mem)
 (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
-        && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc)
-        && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2)
-        && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8
+       && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc)
+       && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2)
+       && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr0 w0 mem)
+(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc)
+       && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2)
+       && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8
+       && clobber(x)
+       -> (MOVHstoreidx ptr1 idx1 w0 mem)
 (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr0 w0 mem)
+(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstoreidx ptr1 idx1 w0 mem)
 (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVWstore [i-2] {s} ptr0 w mem)
+(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVWstoreidx ptr1 idx1 w mem)
 (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVWstore [i-2] {s} ptr0 w mem)
+(MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVWstoreidx ptr1 idx1 w mem)
 (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVWstore [i-2] {s} ptr0 w mem)
+(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVWstoreidx ptr1 idx1 w mem)
 (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVWstore [i-2] {s} ptr0 w0 mem)
+(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVWstoreidx ptr1 idx1 w0 mem)
 (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVDstore [i-4] {s} ptr0 w mem)
+(MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVDstoreidx ptr1 idx1 w mem)
 (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
        && x.Uses == 1
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVDstore [i-4] {s} ptr0 w0 mem)
+(MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVDstoreidx ptr1 idx1 w0 mem)
 (MOVBstore [i] {s} ptr w
        x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
        x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
        && clobber(x5)
        && clobber(x6)
        -> (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
+(MOVBstore [7] {s} p w
+       x0:(MOVBstore [6] {s} p (SRLconst [8] w)
+       x1:(MOVBstore [5] {s} p (SRLconst [16] w)
+       x2:(MOVBstore [4] {s} p (SRLconst [24] w)
+       x3:(MOVBstore [3] {s} p (SRLconst [32] w)
+       x4:(MOVBstore [2] {s} p (SRLconst [40] w)
+       x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w)
+       x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
+       && x0.Uses == 1
+       && x1.Uses == 1
+       && x2.Uses == 1
+       && x3.Uses == 1
+       && x4.Uses == 1
+       && x5.Uses == 1
+       && x6.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0)
+       && clobber(x1)
+       && clobber(x2)
+       && clobber(x3)
+       && clobber(x4)
+       && clobber(x5)
+       && clobber(x6)
+       -> (MOVDstoreidx ptr0 idx0 (REV <w.Type> w) mem)
 (MOVBstore [i] {s} ptr w
        x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w)
        x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w)
        && clobber(x1)
        && clobber(x2)
        -> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+(MOVBstore [3] {s} p w
+       x0:(MOVBstore [2] {s} p (UBFX [arm64BFAuxInt(8, 24)] w)
+       x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [arm64BFAuxInt(16, 16)] w)
+       x2:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
+       && x0.Uses == 1
+       && x1.Uses == 1
+       && x2.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0)
+       && clobber(x1)
+       && clobber(x2)
+       -> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
 (MOVBstore [i] {s} ptr w
        x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w))
        x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w))
        && clobber(x1)
        && clobber(x2)
        -> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+(MOVBstore [3] {s} p w
+       x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w))
+       x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w))
+       x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
+       && x0.Uses == 1
+       && x1.Uses == 1
+       && x2.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0)
+       && clobber(x1)
+       && clobber(x2)
+       -> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
 (MOVBstore [i] {s} ptr w
        x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
        x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
        && clobber(x1)
        && clobber(x2)
        -> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+(MOVBstore [3] {s} p w
+       x0:(MOVBstore [2] {s} p (SRLconst [8] w)
+       x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w)
+       x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
+       && x0.Uses == 1
+       && x1.Uses == 1
+       && x2.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && isSamePtr(p1, p)
+       && clobber(x0)
+       && clobber(x1)
+       && clobber(x2)
+       -> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
 (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
        && x.Uses == 1
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
 (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem))
        && x.Uses == 1
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 8)] w) mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
 (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
        && x.Uses == 1
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
 (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem))
        && x.Uses == 1
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 24)] w) mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
 (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
        && x.Uses == 1
        && clobber(x)
        -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
+       && x.Uses == 1
+       && s == nil
+       && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
+       && clobber(x)
+       -> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
 
 // FP simplification
 (FNEGS (FMULS x y)) -> (FNMULS x y)
index ec75ca38c60ddeb4b4d4817d68430bdf3d025542..184e22717ee6f3b9a26fba5a83d3991caeef452c 100644 (file)
@@ -144,6 +144,7 @@ func init() {
                gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
                gp22      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
                gpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
+               gp2load   = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
                gpstore   = regInfo{inputs: []regMask{gpspsbg, gpg}}
                gpstore0  = regInfo{inputs: []regMask{gpspsbg}}
                gpstore2  = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
@@ -318,6 +319,15 @@ func init() {
                {name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux.  arg1=mem.
                {name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux.  arg1=mem.
 
+               // register indexed load
+               {name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", faultOnNilArg0: true},   // load 64-bit dword from arg0 + arg1, arg2 = mem.
+               {name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW", faultOnNilArg0: true},   // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
+               {name: "MOVWUloadidx", argLength: 3, reg: gp2load, asm: "MOVWU", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
+               {name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH", faultOnNilArg0: true},   // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
+               {name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
+               {name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB", faultOnNilArg0: true},   // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
+               {name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
+
                {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},   // store 1 byte of arg1 to arg0 + auxInt + aux.  arg2=mem.
                {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},   // store 2 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
                {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},   // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
@@ -326,12 +336,24 @@ func init() {
                {name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
                {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
 
+               // register indexed store
+               {name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem.
+               {name: "MOVHstoreidx", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem.
+               {name: "MOVWstoreidx", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem.
+               {name: "MOVDstoreidx", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem.
+
                {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux.  arg1=mem.
                {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
                {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
                {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
                {name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},  // store 16 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
 
+               // register indexed store zero
+               {name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of zero to arg0 + arg1, arg2 = mem.
+               {name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem.
+               {name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem.
+               {name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem.
+
                {name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion)
                {name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion)
 
index 4b782acfa609705fe14bf8f7d125ea04eb851e90..0de1ccfddec26b4dec2d21474b38ae60544bb986 100644 (file)
@@ -1152,6 +1152,13 @@ const (
        OpARM64MOVDload
        OpARM64FMOVSload
        OpARM64FMOVDload
+       OpARM64MOVDloadidx
+       OpARM64MOVWloadidx
+       OpARM64MOVWUloadidx
+       OpARM64MOVHloadidx
+       OpARM64MOVHUloadidx
+       OpARM64MOVBloadidx
+       OpARM64MOVBUloadidx
        OpARM64MOVBstore
        OpARM64MOVHstore
        OpARM64MOVWstore
@@ -1159,11 +1166,19 @@ const (
        OpARM64STP
        OpARM64FMOVSstore
        OpARM64FMOVDstore
+       OpARM64MOVBstoreidx
+       OpARM64MOVHstoreidx
+       OpARM64MOVWstoreidx
+       OpARM64MOVDstoreidx
        OpARM64MOVBstorezero
        OpARM64MOVHstorezero
        OpARM64MOVWstorezero
        OpARM64MOVDstorezero
        OpARM64MOVQstorezero
+       OpARM64MOVBstorezeroidx
+       OpARM64MOVHstorezeroidx
+       OpARM64MOVWstorezeroidx
+       OpARM64MOVDstorezeroidx
        OpARM64FMOVDgpfp
        OpARM64FMOVDfpgp
        OpARM64MOVBreg
@@ -15070,6 +15085,111 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "MOVDloadidx",
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:           "MOVWloadidx",
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:           "MOVWUloadidx",
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVWU,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:           "MOVHloadidx",
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVH,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:           "MOVHUloadidx",
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVHU,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:           "MOVBloadidx",
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:           "MOVBUloadidx",
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVBU,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
        {
                name:           "MOVBstore",
                auxType:        auxSymOff,
@@ -15169,6 +15289,58 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "MOVBstoreidx",
+               argLen:         4,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:           "MOVHstoreidx",
+               argLen:         4,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVH,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:           "MOVWstoreidx",
+               argLen:         4,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:           "MOVDstoreidx",
+               argLen:         4,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
        {
                name:           "MOVBstorezero",
                auxType:        auxSymOff,
@@ -15234,6 +15406,54 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "MOVBstorezeroidx",
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:           "MOVHstorezeroidx",
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVH,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:           "MOVWstorezeroidx",
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
+       {
+               name:           "MOVDstorezeroidx",
+               argLen:         3,
+               faultOnNilArg0: true,
+               asm:            arm64.AMOVD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+               },
+       },
        {
                name:   "FMOVDgpfp",
                argLen: 1,
index 407719e744f704707007657a68fd6cb8db6ce7cf..90cbff3a59db92e61fb6b3654bdc1eaa8e8107c9 100644 (file)
@@ -131,50 +131,80 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64MODW_0(v)
        case OpARM64MOVBUload:
                return rewriteValueARM64_OpARM64MOVBUload_0(v)
+       case OpARM64MOVBUloadidx:
+               return rewriteValueARM64_OpARM64MOVBUloadidx_0(v)
        case OpARM64MOVBUreg:
                return rewriteValueARM64_OpARM64MOVBUreg_0(v)
        case OpARM64MOVBload:
                return rewriteValueARM64_OpARM64MOVBload_0(v)
+       case OpARM64MOVBloadidx:
+               return rewriteValueARM64_OpARM64MOVBloadidx_0(v)
        case OpARM64MOVBreg:
                return rewriteValueARM64_OpARM64MOVBreg_0(v)
        case OpARM64MOVBstore:
-               return rewriteValueARM64_OpARM64MOVBstore_0(v) || rewriteValueARM64_OpARM64MOVBstore_10(v) || rewriteValueARM64_OpARM64MOVBstore_20(v)
+               return rewriteValueARM64_OpARM64MOVBstore_0(v) || rewriteValueARM64_OpARM64MOVBstore_10(v) || rewriteValueARM64_OpARM64MOVBstore_20(v) || rewriteValueARM64_OpARM64MOVBstore_30(v) || rewriteValueARM64_OpARM64MOVBstore_40(v)
+       case OpARM64MOVBstoreidx:
+               return rewriteValueARM64_OpARM64MOVBstoreidx_0(v)
        case OpARM64MOVBstorezero:
                return rewriteValueARM64_OpARM64MOVBstorezero_0(v)
+       case OpARM64MOVBstorezeroidx:
+               return rewriteValueARM64_OpARM64MOVBstorezeroidx_0(v)
        case OpARM64MOVDload:
                return rewriteValueARM64_OpARM64MOVDload_0(v)
+       case OpARM64MOVDloadidx:
+               return rewriteValueARM64_OpARM64MOVDloadidx_0(v)
        case OpARM64MOVDreg:
                return rewriteValueARM64_OpARM64MOVDreg_0(v)
        case OpARM64MOVDstore:
                return rewriteValueARM64_OpARM64MOVDstore_0(v)
+       case OpARM64MOVDstoreidx:
+               return rewriteValueARM64_OpARM64MOVDstoreidx_0(v)
        case OpARM64MOVDstorezero:
                return rewriteValueARM64_OpARM64MOVDstorezero_0(v)
+       case OpARM64MOVDstorezeroidx:
+               return rewriteValueARM64_OpARM64MOVDstorezeroidx_0(v)
        case OpARM64MOVHUload:
                return rewriteValueARM64_OpARM64MOVHUload_0(v)
+       case OpARM64MOVHUloadidx:
+               return rewriteValueARM64_OpARM64MOVHUloadidx_0(v)
        case OpARM64MOVHUreg:
                return rewriteValueARM64_OpARM64MOVHUreg_0(v)
        case OpARM64MOVHload:
                return rewriteValueARM64_OpARM64MOVHload_0(v)
+       case OpARM64MOVHloadidx:
+               return rewriteValueARM64_OpARM64MOVHloadidx_0(v)
        case OpARM64MOVHreg:
-               return rewriteValueARM64_OpARM64MOVHreg_0(v)
+               return rewriteValueARM64_OpARM64MOVHreg_0(v) || rewriteValueARM64_OpARM64MOVHreg_10(v)
        case OpARM64MOVHstore:
                return rewriteValueARM64_OpARM64MOVHstore_0(v) || rewriteValueARM64_OpARM64MOVHstore_10(v)
+       case OpARM64MOVHstoreidx:
+               return rewriteValueARM64_OpARM64MOVHstoreidx_0(v)
        case OpARM64MOVHstorezero:
                return rewriteValueARM64_OpARM64MOVHstorezero_0(v)
+       case OpARM64MOVHstorezeroidx:
+               return rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v)
        case OpARM64MOVQstorezero:
                return rewriteValueARM64_OpARM64MOVQstorezero_0(v)
        case OpARM64MOVWUload:
                return rewriteValueARM64_OpARM64MOVWUload_0(v)
+       case OpARM64MOVWUloadidx:
+               return rewriteValueARM64_OpARM64MOVWUloadidx_0(v)
        case OpARM64MOVWUreg:
-               return rewriteValueARM64_OpARM64MOVWUreg_0(v)
+               return rewriteValueARM64_OpARM64MOVWUreg_0(v) || rewriteValueARM64_OpARM64MOVWUreg_10(v)
        case OpARM64MOVWload:
                return rewriteValueARM64_OpARM64MOVWload_0(v)
+       case OpARM64MOVWloadidx:
+               return rewriteValueARM64_OpARM64MOVWloadidx_0(v)
        case OpARM64MOVWreg:
                return rewriteValueARM64_OpARM64MOVWreg_0(v) || rewriteValueARM64_OpARM64MOVWreg_10(v)
        case OpARM64MOVWstore:
                return rewriteValueARM64_OpARM64MOVWstore_0(v)
+       case OpARM64MOVWstoreidx:
+               return rewriteValueARM64_OpARM64MOVWstoreidx_0(v)
        case OpARM64MOVWstorezero:
                return rewriteValueARM64_OpARM64MOVWstorezero_0(v)
+       case OpARM64MOVWstorezeroidx:
+               return rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v)
        case OpARM64MUL:
                return rewriteValueARM64_OpARM64MUL_0(v) || rewriteValueARM64_OpARM64MUL_10(v) || rewriteValueARM64_OpARM64MUL_20(v)
        case OpARM64MULW:
@@ -186,7 +216,7 @@ func rewriteValueARM64(v *Value) bool {
        case OpARM64NotEqual:
                return rewriteValueARM64_OpARM64NotEqual_0(v)
        case OpARM64OR:
-               return rewriteValueARM64_OpARM64OR_0(v) || rewriteValueARM64_OpARM64OR_10(v) || rewriteValueARM64_OpARM64OR_20(v)
+               return rewriteValueARM64_OpARM64OR_0(v) || rewriteValueARM64_OpARM64OR_10(v) || rewriteValueARM64_OpARM64OR_20(v) || rewriteValueARM64_OpARM64OR_30(v)
        case OpARM64ORN:
                return rewriteValueARM64_OpARM64ORN_0(v)
        case OpARM64ORNshiftLL:
@@ -5754,6 +5784,30 @@ func rewriteValueARM64_OpARM64MOVBUload_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBUload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBUloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVBUloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
        // result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
@@ -5804,6 +5858,66 @@ func rewriteValueARM64_OpARM64MOVBUload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64MOVBUloadidx_0(v *Value) bool {
+       // match: (MOVBUloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVBUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVBUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVBstorezeroidx {
+                       break
+               }
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64MOVBUreg_0(v *Value) bool {
        // match: (MOVBUreg x:(MOVBUload _ _))
        // cond:
@@ -5818,6 +5932,19 @@ func rewriteValueARM64_OpARM64MOVBUreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MOVBUreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
        // match: (MOVBUreg x:(MOVBUreg _))
        // cond:
        // result: (MOVDreg x)
@@ -5937,6 +6064,30 @@ func rewriteValueARM64_OpARM64MOVBload_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVBloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
        // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
@@ -5987,6 +6138,66 @@ func rewriteValueARM64_OpARM64MOVBload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64MOVBloadidx_0(v *Value) bool {
+       // match: (MOVBloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVBload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVBload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVBstorezeroidx {
+                       break
+               }
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64MOVBreg_0(v *Value) bool {
        // match: (MOVBreg x:(MOVBload _ _))
        // cond:
@@ -6001,6 +6212,19 @@ func rewriteValueARM64_OpARM64MOVBreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MOVBreg x:(MOVBloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
        // match: (MOVBreg x:(MOVBreg _))
        // cond:
        // result: (MOVDreg x)
@@ -6077,6 +6301,32 @@ func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBstoreidx ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
        // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
@@ -6259,6 +6509,9 @@ func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
        // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
        // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
        // result: (MOVHstore [i-1] {s} ptr0 w mem)
@@ -6302,11 +6555,51 @@ func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
-       b := v.Block
-       _ = b
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 8 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
        // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
        // result: (MOVHstore [i-1] {s} ptr0 w mem)
@@ -6350,50 +6643,52 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w mem)
        for {
-               i := v.AuxInt
+               if v.AuxInt != 1 {
+                       break
+               }
                s := v.Aux
                _ = v.Args[2]
-               ptr0 := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64UBFX {
                        break
                }
-               if v_1.AuxInt != arm64BFAuxInt(8, 24) {
+               if v_1.AuxInt != arm64BFAuxInt(8, 8) {
                        break
                }
                w := v_1.Args[0]
                x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
-                       break
-               }
-               if x.AuxInt != i-1 {
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[2]
+               _ = x.Args[3]
                ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr0)
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
                v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
+       // match: (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
        // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
        // result: (MOVHstore [i-1] {s} ptr0 w mem)
        for {
@@ -6402,17 +6697,13 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                _ = v.Args[2]
                ptr0 := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 8 {
+               if v_1.Op != OpARM64UBFX {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               if v_1.AuxInt != arm64BFAuxInt(8, 24) {
                        break
                }
-               w := v_1_0.Args[0]
+               w := v_1.Args[0]
                x := v.Args[2]
                if x.Op != OpARM64MOVBstore {
                        break
@@ -6440,20 +6731,161 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w mem)
        for {
-               i := v.AuxInt
+               if v.AuxInt != 1 {
+                       break
+               }
                s := v.Aux
                _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
+                       break
+               }
+               if v_1.AuxInt != arm64BFAuxInt(8, 24) {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
                x := v.Args[2]
                if x.Op != OpARM64MOVBstore {
                        break
@@ -6488,6 +6920,61 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
        // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 && clobber(x)
        // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
@@ -6534,6 +7021,54 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
+                       break
+               }
+               bfc := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64UBFX {
+                       break
+               }
+               bfc2 := w0.AuxInt
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && getARM64BFwidth(bfc) == 32-getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32-getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc)-8 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
        // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
        // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
@@ -6590,6 +7125,64 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               w0_0 := w0.Args[0]
+               if w0_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != w0_0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w) x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w) x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w) x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
        // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
        // result: (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
@@ -6781,34 +7374,36 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstore [i-3] {s} ptr (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+       // match: (MOVBstore [7] {s} p w x0:(MOVBstore [6] {s} p (SRLconst [8] w) x1:(MOVBstore [5] {s} p (SRLconst [16] w) x2:(MOVBstore [4] {s} p (SRLconst [24] w) x3:(MOVBstore [3] {s} p (SRLconst [32] w) x4:(MOVBstore [2] {s} p (SRLconst [40] w) x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w) x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
+       // result: (MOVDstoreidx ptr0 idx0 (REV <w.Type> w) mem)
        for {
-               i := v.AuxInt
+               if v.AuxInt != 7 {
+                       break
+               }
                s := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
+               p := v.Args[0]
                w := v.Args[1]
                x0 := v.Args[2]
                if x0.Op != OpARM64MOVBstore {
                        break
                }
-               if x0.AuxInt != i-1 {
+               if x0.AuxInt != 6 {
                        break
                }
                if x0.Aux != s {
                        break
                }
                _ = x0.Args[2]
-               if ptr != x0.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
                x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64UBFX {
+               if x0_1.Op != OpARM64SRLconst {
                        break
                }
-               if x0_1.AuxInt != arm64BFAuxInt(8, 24) {
+               if x0_1.AuxInt != 8 {
                        break
                }
                if w != x0_1.Args[0] {
@@ -6818,21 +7413,21 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                if x1.Op != OpARM64MOVBstore {
                        break
                }
-               if x1.AuxInt != i-2 {
+               if x1.AuxInt != 5 {
                        break
                }
                if x1.Aux != s {
                        break
                }
                _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
                x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64UBFX {
+               if x1_1.Op != OpARM64SRLconst {
                        break
                }
-               if x1_1.AuxInt != arm64BFAuxInt(16, 16) {
+               if x1_1.AuxInt != 16 {
                        break
                }
                if w != x1_1.Args[0] {
@@ -6842,148 +7437,133 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                if x2.Op != OpARM64MOVBstore {
                        break
                }
-               if x2.AuxInt != i-3 {
+               if x2.AuxInt != 4 {
                        break
                }
                if x2.Aux != s {
                        break
                }
                _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               if p != x2.Args[0] {
                        break
                }
                x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64UBFX {
+               if x2_1.Op != OpARM64SRLconst {
                        break
                }
-               if x2_1.AuxInt != arm64BFAuxInt(24, 8) {
+               if x2_1.AuxInt != 24 {
                        break
                }
                if w != x2_1.Args[0] {
                        break
                }
-               mem := x2.Args[2]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+               x3 := x2.Args[2]
+               if x3.Op != OpARM64MOVBstore {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 3
-               v.Aux = s
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               w := v.Args[1]
-               x0 := v.Args[2]
-               if x0.Op != OpARM64MOVBstore {
+               if x3.AuxInt != 3 {
                        break
                }
-               if x0.AuxInt != i-1 {
+               if x3.Aux != s {
                        break
                }
-               if x0.Aux != s {
+               _ = x3.Args[2]
+               if p != x3.Args[0] {
                        break
                }
-               _ = x0.Args[2]
-               if ptr != x0.Args[0] {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64SRLconst {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst {
+               if x3_1.AuxInt != 32 {
                        break
                }
-               if x0_1.AuxInt != 8 {
+               if w != x3_1.Args[0] {
                        break
                }
-               x0_1_0 := x0_1.Args[0]
-               if x0_1_0.Op != OpARM64MOVDreg {
+               x4 := x3.Args[2]
+               if x4.Op != OpARM64MOVBstore {
                        break
                }
-               if w != x0_1_0.Args[0] {
+               if x4.AuxInt != 2 {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore {
+               if x4.Aux != s {
                        break
                }
-               if x1.AuxInt != i-2 {
+               _ = x4.Args[2]
+               if p != x4.Args[0] {
                        break
                }
-               if x1.Aux != s {
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               if x4_1.AuxInt != 40 {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64SRLconst {
+               if w != x4_1.Args[0] {
                        break
                }
-               if x1_1.AuxInt != 16 {
+               x5 := x4.Args[2]
+               if x5.Op != OpARM64MOVBstore {
                        break
                }
-               x1_1_0 := x1_1.Args[0]
-               if x1_1_0.Op != OpARM64MOVDreg {
+               if x5.AuxInt != 1 {
                        break
                }
-               if w != x1_1_0.Args[0] {
+               if x5.Aux != s {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstore {
+               _ = x5.Args[2]
+               p1 := x5.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
-               if x2.AuxInt != i-3 {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64SRLconst {
                        break
                }
-               if x2.Aux != s {
+               if x5_1.AuxInt != 48 {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               if w != x5_1.Args[0] {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64SRLconst {
+               x6 := x5.Args[2]
+               if x6.Op != OpARM64MOVBstoreidx {
                        break
                }
-               if x2_1.AuxInt != 24 {
+               _ = x6.Args[3]
+               ptr0 := x6.Args[0]
+               idx0 := x6.Args[1]
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpARM64SRLconst {
                        break
                }
-               x2_1_0 := x2_1.Args[0]
-               if x2_1_0.Op != OpARM64MOVDreg {
+               if x6_2.AuxInt != 56 {
                        break
                }
-               if w != x2_1_0.Args[0] {
+               if w != x6_2.Args[0] {
                        break
                }
-               mem := x2.Args[2]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+               mem := x6.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 3
-               v.Aux = s
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, w.Type)
                v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
+       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstore [i-3] {s} ptr (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
        // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
        // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
        for {
@@ -7007,10 +7587,10 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                        break
                }
                x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst {
+               if x0_1.Op != OpARM64UBFX {
                        break
                }
-               if x0_1.AuxInt != 8 {
+               if x0_1.AuxInt != arm64BFAuxInt(8, 24) {
                        break
                }
                if w != x0_1.Args[0] {
@@ -7031,10 +7611,10 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                        break
                }
                x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64SRLconst {
+               if x1_1.Op != OpARM64UBFX {
                        break
                }
-               if x1_1.AuxInt != 16 {
+               if x1_1.AuxInt != arm64BFAuxInt(16, 16) {
                        break
                }
                if w != x1_1.Args[0] {
@@ -7055,10 +7635,10 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                        break
                }
                x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64SRLconst {
+               if x2_1.Op != OpARM64UBFX {
                        break
                }
-               if x2_1.AuxInt != 24 {
+               if x2_1.AuxInt != arm64BFAuxInt(24, 8) {
                        break
                }
                if w != x2_1.Args[0] {
@@ -7078,1466 +7658,1189 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
        for {
-               i := v.AuxInt
+               if v.AuxInt != 3 {
+                       break
+               }
                s := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
+               p := v.Args[0]
                w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
                        break
                }
-               if x.AuxInt != i-1 {
+               if x0.AuxInt != 2 {
                        break
                }
-               if x.Aux != s {
+               if x0.Aux != s {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64SRLconst {
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64UBFX {
                        break
                }
-               if x_1.AuxInt != 8 {
+               if x0_1.AuxInt != arm64BFAuxInt(8, 24) {
                        break
                }
-               if w != x_1.Args[0] {
+               if w != x0_1.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               if x1.AuxInt != 1 {
                        break
                }
-               if x.AuxInt != i-1 {
+               if x1.Aux != s {
                        break
                }
-               if x.Aux != s {
+               _ = x1.Args[2]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64UBFX {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64UBFX {
+               if x1_1.AuxInt != arm64BFAuxInt(16, 16) {
                        break
                }
-               if x_1.AuxInt != arm64BFAuxInt(8, 8) {
+               if w != x1_1.Args[0] {
                        break
                }
-               if w != x_1.Args[0] {
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstoreidx {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               _ = x2.Args[3]
+               ptr0 := x2.Args[0]
+               idx0 := x2.Args[1]
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpARM64UBFX {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               if x2_2.AuxInt != arm64BFAuxInt(24, 8) {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               mem := x2.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
                v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
                        break
                }
-               if x.AuxInt != i-1 {
+               if x0.AuxInt != i-1 {
                        break
                }
-               if x.Aux != s {
+               if x0.Aux != s {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64SRLconst {
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
                        break
                }
-               if x_1.AuxInt != 8 {
+               if x0_1.AuxInt != 8 {
                        break
                }
-               x_1_0 := x_1.Args[0]
-               if x_1_0.Op != OpARM64MOVDreg {
+               x0_1_0 := x0_1.Args[0]
+               if x0_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               if w != x_1_0.Args[0] {
+               if w != x0_1_0.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               if x1.AuxInt != i-2 {
                        break
                }
-               if x.AuxInt != i-1 {
+               if x1.Aux != s {
                        break
                }
-               if x.Aux != s {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64UBFX {
+               if x1_1.AuxInt != 16 {
                        break
                }
-               if x_1.AuxInt != arm64BFAuxInt(8, 24) {
+               x1_1_0 := x1_1.Args[0]
+               if x1_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               if w != x_1.Args[0] {
+               if w != x1_1_0.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstore {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
+               if x2.AuxInt != i-3 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x2_1.AuxInt != 24 {
+                       break
+               }
+               x2_1_0 := x2_1.Args[0]
+               if x2_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != x2_1_0.Args[0] {
+                       break
+               }
+               mem := x2.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 3
                v.Aux = s
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
                v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w)) x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
        for {
-               i := v.AuxInt
+               if v.AuxInt != 3 {
+                       break
+               }
                s := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
+               p := v.Args[0]
                w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
                        break
                }
-               if x.AuxInt != i-1 {
+               if x0.AuxInt != 2 {
                        break
                }
-               if x.Aux != s {
+               if x0.Aux != s {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64SRLconst {
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
                        break
                }
-               if x_1.AuxInt != 8 {
+               if x0_1.AuxInt != 8 {
                        break
                }
-               x_1_0 := x_1.Args[0]
-               if x_1_0.Op != OpARM64MOVDreg {
+               x0_1_0 := x0_1.Args[0]
+               if x0_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               if w != x_1_0.Args[0] {
+               if w != x0_1_0.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBstorezero [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if x1.AuxInt != 1 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               _ = x1.Args[2]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,1) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstorezero [min(i,j)] {s} ptr0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVBstorezero {
+               if x1_1.AuxInt != 16 {
                        break
                }
-               j := x.AuxInt
-               if x.Aux != s {
+               x1_1_0 := x1_1.Args[0]
+               if x1_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               _ = x.Args[1]
-               ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 1) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if w != x1_1_0.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x2.Args[3]
+               ptr0 := x2.Args[0]
+               idx0 := x2.Args[1]
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpARM64SRLconst {
+                       break
+               }
+               if x2_2.AuxInt != 24 {
+                       break
+               }
+               x2_2_0 := x2_2.Args[0]
+               if x2_2_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != x2_2_0.Args[0] {
+                       break
+               }
+               mem := x2.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
                v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVBstore_30(v *Value) bool {
        b := v.Block
        _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDload [off1+off2] {sym} ptr mem)
+       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if x0.AuxInt != i-1 {
                        break
                }
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if x0.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDstorezero {
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if x0_1.AuxInt != 8 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDreg_0(v *Value) bool {
-       // match: (MOVDreg x)
-       // cond: x.Uses == 1
-       // result: (MOVDnop x)
-       for {
-               x := v.Args[0]
-               if !(x.Uses == 1) {
+               if w != x0_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDnop)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDstore_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVDstore ptr (FMOVDfpgp val) mem)
-       // cond:
-       // result: (FMOVDstore ptr val mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FMOVDfpgp {
+               if x1.AuxInt != i-2 {
                        break
                }
-               val := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64FMOVDstore)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if x1.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if x1_1.AuxInt != 16 {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVDstorezero [off] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if w != x1_1.Args[0] {
                        break
                }
-               if v_1.AuxInt != 0 {
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstore {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstorezero [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if x2.AuxInt != i-3 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if x2.Aux != s {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               if x2_1.AuxInt != 24 {
+                       break
+               }
+               if w != x2_1.Args[0] {
+                       break
+               }
+               mem := x2.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 3
+               v.Aux = s
                v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,8) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVQstorezero [min(i,j)] {s} ptr0 mem)
+       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (SRLconst [8] w) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w) x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
        for {
-               i := v.AuxInt
+               if v.AuxInt != 3 {
+                       break
+               }
                s := v.Aux
-               _ = v.Args[1]
-               ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVDstorezero {
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
                        break
                }
-               j := x.AuxInt
-               if x.Aux != s {
+               if x0.AuxInt != 2 {
                        break
                }
-               _ = x.Args[1]
-               ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 8) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if x0.Aux != s {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHUload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if x0_1.AuxInt != 8 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if w != x0_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHstorezero {
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if x1.AuxInt != 1 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHUreg_0(v *Value) bool {
-       // match: (MOVHUreg x:(MOVBUload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               if x1.Aux != s {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg x:(MOVHUload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUload {
+               _ = x1.Args[2]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg x:(MOVBUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg x:(MOVHUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUreg {
+               if x1_1.AuxInt != 16 {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg (ANDconst [c] x))
-       // cond:
-       // result: (ANDconst [c&(1<<16-1)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               if w != x1_1.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<16 - 1)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(uint16(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstoreidx {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint16(c))
-               return true
-       }
-       // match: (MOVHUreg (SLLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<16-1, sc)
-       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               _ = x2.Args[3]
+               ptr0 := x2.Args[0]
+               idx0 := x2.Args[1]
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpARM64SRLconst {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<16-1, sc)) {
+               if x2_2.AuxInt != 24 {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg (SRLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<16-1, 0)
-       // result: (UBFX [arm64BFAuxInt(sc, 16)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               if w != x2_2.Args[0] {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<16-1, 0)) {
+               mem := x2.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, 16)
-               v.AddArg(x)
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHload [off1+off2] {sym} ptr mem)
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if x.AuxInt != i-1 {
                        break
                }
-               v.reset(OpARM64MOVHload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if x.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVHload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x_1.AuxInt != 8 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
                v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
+       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHstorezero {
+               if v.AuxInt != 1 {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHreg_0(v *Value) bool {
-       // match: (MOVHreg x:(MOVBload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBload {
+               _ = v_0.Args[1]
+               ptr1 := v_0.Args[0]
+               idx1 := v_0.Args[1]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBUload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               _ = x.Args[3]
+               ptr0 := x.Args[0]
+               idx0 := x.Args[1]
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64SRLconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVHload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHload {
+               if x_2.AuxInt != 8 {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBreg {
+               if w != x_2.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHreg {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int16(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if x.AuxInt != i-1 {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int16(c))
-               return true
-       }
-       // match: (MOVHreg (SLLconst [lc] x))
-       // cond: lc < 16
-       // result: (SBFIZ [arm64BFAuxInt(lc, 16-lc)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if x.Aux != s {
                        break
                }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc < 16) {
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
                        break
                }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BFAuxInt(lc, 16-lc)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64UBFX {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if x_1.AuxInt != arm64BFAuxInt(8, 8) {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
                v.reset(OpARM64MOVHstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
+               v.AuxInt = i - 1
+               v.Aux = s
                v.AddArg(ptr)
-               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 8)] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
                _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = v_0.Args[1]
+               ptr1 := v_0.Args[0]
+               idx1 := v_0.Args[1]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
+               _ = x.Args[3]
+               ptr0 := x.Args[0]
+               idx0 := x.Args[1]
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64UBFX {
+                       break
+               }
+               if x_2.AuxInt != arm64BFAuxInt(8, 8) {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVHstorezero [off] {sym} ptr mem)
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               i := v.AuxInt
+               s := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
                        break
                }
-               if v_1.AuxInt != 0 {
+               if x.AuxInt != i-1 {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = off
-               v.Aux = sym
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x_1.AuxInt != 8 {
+                       break
+               }
+               x_1_0 := x_1.Args[0]
+               if x_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != x_1_0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
                v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
+       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} ptr (MOVHUreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHUreg {
+               _ = v_0.Args[1]
+               ptr1 := v_0.Args[0]
+               idx1 := v_0.Args[1]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} ptr (MOVWreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWreg {
+               _ = x.Args[3]
+               ptr0 := x.Args[0]
+               idx0 := x.Args[1]
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64SRLconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstore [off] {sym} ptr (MOVWUreg x) mem)
-       // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWUreg {
+               if x_2.AuxInt != 8 {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
+               x_2_0 := x_2.Args[0]
+               if x_2_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != x_2_0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
                        break
                }
-               if v_1.AuxInt != 16 {
+               if x.AuxInt != i-1 {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
+               if x.Aux != s {
                        break
                }
-               if x.AuxInt != i-2 {
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
                        break
                }
-               if x.Aux != s {
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64UBFX {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               if x_1.AuxInt != arm64BFAuxInt(8, 24) {
+                       break
+               }
+               if w != x_1.Args[0] {
                        break
                }
                mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
                v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 24)] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
+               if v.AuxInt != 1 {
                        break
                }
-               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               w := v_1.Args[0]
+               _ = v_0.Args[1]
+               ptr1 := v_0.Args[0]
+               idx1 := v_0.Args[1]
+               w := v.Args[1]
                x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               if x.AuxInt != i-2 {
+               _ = x.Args[3]
+               ptr0 := x.Args[0]
+               idx0 := x.Args[1]
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64UBFX {
                        break
                }
-               if x.Aux != s {
+               if x_2.AuxInt != arm64BFAuxInt(8, 24) {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               if w != x_2.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
+               v.reset(OpARM64MOVHstoreidx)
                v.AddArg(ptr0)
-               v.AddArg(w)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstore_40(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
                        break
                }
-               if v_1.AuxInt != 16 {
+               if x.AuxInt != i-1 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               if x.Aux != s {
                        break
                }
-               w := v_1_0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
                        break
                }
-               if x.AuxInt != i-2 {
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64SRLconst {
                        break
                }
-               if x.Aux != s {
+               if x_1.AuxInt != 8 {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               x_1_0 := x_1.Args[0]
+               if x_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != x_1_0.Args[0] {
                        break
                }
                mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
                v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstore_10(v *Value) bool {
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w0 mem)
+       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
        for {
-               i := v.AuxInt
+               if v.AuxInt != 1 {
+                       break
+               }
                s := v.Aux
                _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
+               _ = v_0.Args[1]
+               ptr1 := v_0.Args[0]
+               idx1 := v_0.Args[1]
+               w := v.Args[1]
                x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               if x.Aux != s {
+               _ = x.Args[3]
+               ptr0 := x.Args[0]
+               idx0 := x.Args[1]
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64SRLconst {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst {
+               if x_2.AuxInt != 8 {
                        break
                }
-               if w0.AuxInt != j-16 {
+               x_2_0 := x_2.Args[0]
+               if x_2_0.Op != OpARM64MOVDreg {
                        break
                }
-               if w != w0.Args[0] {
+               if w != x_2_0.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
+               v.reset(OpARM64MOVHstoreidx)
                v.AddArg(ptr0)
-               v.AddArg(w0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstorezero [off1+off2] {sym} ptr mem)
+func rewriteValueARM64_OpARM64MOVBstoreidx_0(v *Value) bool {
+       // match: (MOVBstoreidx ptr (MOVDconst [c]) val mem)
+       // cond:
+       // result: (MOVBstore [c] ptr val mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = c
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MOVBstoreidx (MOVDconst [c]) idx val mem)
+       // cond:
+       // result: (MOVBstore [c] idx val mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[3]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVBstorezeroidx ptr idx mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               if v_2.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstorezeroidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,2) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstorezero [min(i,j)] {s} ptr0 mem)
+       // match: (MOVBstoreidx ptr idx (MOVBreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVHstorezero {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVBreg {
                        break
                }
-               j := x.AuxInt
-               if x.Aux != s {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVBUreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVBUreg {
                        break
                }
-               _ = x.Args[1]
-               ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 2) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVHreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHreg {
                        break
                }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
-               v.AddArg(ptr0)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVHUreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHUreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVWreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVQstorezero [off1+off2] {sym} ptr mem)
+       // result: (MOVBstorezero [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -8552,16 +8855,16 @@ func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
+               v.reset(OpARM64MOVBstorezero)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -8577,244 +8880,148 @@ func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
+               v.reset(OpARM64MOVBstorezero)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWUload [off1+off2] {sym} ptr mem)
+       // match: (MOVBstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBstorezeroidx ptr idx mem)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               off2 := v_0.AuxInt
+               _ = v_0.Args[1]
                ptr := v_0.Args[0]
+               idx := v_0.Args[1]
                mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWUload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
+               v.reset(OpARM64MOVBstorezeroidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,1) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstorezero [min(i,j)] {s} ptr0 mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               i := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVBstorezero {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               j := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               v.reset(OpARM64MOVWUload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 1) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
+       // match: (MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstorezeroidx ptr1 idx1 mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
                _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWstorezero {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVBstorezeroidx {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezeroidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
-       // match: (MOVWUreg x:(MOVBUload _ _))
+func rewriteValueARM64_OpARM64MOVBstorezeroidx_0(v *Value) bool {
+       // match: (MOVBstorezeroidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVBstorezero [c] ptr mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUload _ _))
+       // match: (MOVBstorezeroidx (MOVDconst [c]) idx mem)
        // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUload {
-                       break
-               }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg x:(MOVWUload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWUload {
-                       break
-               }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg x:(MOVBUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg x:(MOVHUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg x:(MOVWUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWUreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg (ANDconst [c] x))
-       // cond:
-       // result: (ANDconst [c&(1<<32-1)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<32 - 1)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(uint32(c))])
+       // result: (MOVBstorezero [c] idx mem)
        for {
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint32(c))
-               return true
-       }
-       // match: (MOVWUreg (SLLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<32-1, sc)
-       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<32-1, sc)) {
-                       break
-               }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg (SRLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<32-1, 0)
-       // result: (UBFX [arm64BFAuxInt(sc, 32)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
-                       break
-               }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<32-1, 0)) {
-                       break
-               }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, 32)
-               v.AddArg(x)
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWload [off1+off2] {sym} ptr mem)
+       // result: (MOVDload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -8829,16 +9036,40 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWload)
+               v.reset(OpARM64MOVDload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVDload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVDloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -8854,14 +9085,14 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWload)
+               v.reset(OpARM64MOVDload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // match: (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _))
        // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
        // result: (MOVDconst [0])
        for {
@@ -8870,7 +9101,7 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
                _ = v.Args[1]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWstorezero {
+               if v_1.Op != OpARM64MOVDstorezero {
                        break
                }
                off2 := v_1.AuxInt
@@ -8886,202 +9117,172 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool {
-       // match: (MOVWreg x:(MOVBload _ _))
+func rewriteValueARM64_OpARM64MOVDloadidx_0(v *Value) bool {
+       // match: (MOVDloadidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDload [c] ptr mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBload {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWreg x:(MOVBUload _ _))
+       // match: (MOVDloadidx (MOVDconst [c]) ptr mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDload [c] ptr mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWreg x:(MOVHload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHload {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDstorezeroidx {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHUload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUload {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVWreg x:(MOVWload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDreg_0(v *Value) bool {
+       // match: (MOVDreg x)
+       // cond: x.Uses == 1
+       // result: (MOVDnop x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVWload {
+               if !(x.Uses == 1) {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               v.reset(OpARM64MOVDnop)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg x:(MOVBreg _))
+       // match: (MOVDreg (MOVDconst [c]))
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDconst [c])
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c
                return true
        }
-       // match: (MOVWreg x:(MOVBUreg _))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstore_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVDstore ptr (FMOVDfpgp val) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (FMOVDstore ptr val mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWreg {
-                       break
-               }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool {
-       // match: (MOVWreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FMOVDfpgp {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c))
+               val := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64FMOVDstore)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWreg (SLLconst [lc] x))
-       // cond: lc < 32
-       // result: (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x)
+       // match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVDstore [off1+off2] {sym} ptr val mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc < 32) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BFAuxInt(lc, 32-lc)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
+       // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstoreidx ptr idx val mem)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               off2 := v_0.AuxInt
+               _ = v_0.Args[1]
                ptr := v_0.Args[0]
+               idx := v_0.Args[1]
                val := v.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
+               v.reset(OpARM64MOVDstoreidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -9098,7 +9299,7 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
+               v.reset(OpARM64MOVDstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -9106,9 +9307,9 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
        // cond:
-       // result: (MOVWstorezero [off] {sym} ptr mem)
+       // result: (MOVDstorezero [off] {sym} ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -9122,158 +9323,269 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
                        break
                }
                mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
+               v.reset(OpARM64MOVDstorezero)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstoreidx_0(v *Value) bool {
+       // match: (MOVDstoreidx ptr (MOVDconst [c]) val mem)
        // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       // result: (MOVDstore [c] ptr val mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWreg {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = c
                v.AddArg(ptr)
-               v.AddArg(x)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
+       // match: (MOVDstoreidx (MOVDconst [c]) idx val mem)
        // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       // result: (MOVDstore [c] idx val mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWUreg {
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstore [i-4] {s} ptr0 w mem)
+       // match: (MOVDstoreidx ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVDstorezeroidx ptr idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 32 {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstore {
+               if v_2.AuxInt != 0 {
                        break
                }
-               if x.AuxInt != i-4 {
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVDstorezero [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               if x.Aux != s {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstore [i-4] {s} ptr0 w0 mem)
+       // match: (MOVDstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstorezeroidx ptr idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstore {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               if x.AuxInt != i-4 {
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,8) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVQstorezero [min(i,j)] {s} ptr0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVDstorezero {
                        break
                }
+               j := x.AuxInt
                if x.Aux != s {
                        break
                }
-               _ = x.Args[2]
+               _ = x.Args[1]
                ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst {
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 8) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
                        break
                }
-               if w0.AuxInt != j-32 {
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVQstorezero [0] {s} p0 mem)
+       for {
+               if v.AuxInt != 8 {
                        break
                }
-               if w != w0.Args[0] {
+               s := v.Aux
+               _ = v.Args[1]
+               p0 := v.Args[0]
+               if p0.Op != OpARM64ADD {
+                       break
+               }
+               _ = p0.Args[1]
+               ptr0 := p0.Args[0]
+               idx0 := p0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVDstorezeroidx {
                        break
                }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
                mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = i - 4
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = 0
                v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w0)
+               v.AddArg(p0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVDstorezeroidx_0(v *Value) bool {
+       // match: (MOVDstorezeroidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVDstorezero [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezeroidx (MOVDconst [c]) idx mem)
+       // cond:
+       // result: (MOVDstorezero [c] idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstorezero [off1+off2] {sym} ptr mem)
+       // result: (MOVHUload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -9288,16 +9600,40 @@ func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWstorezero)
+               v.reset(OpARM64MOVHUload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVHUload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHUloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVHUloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -9313,1497 +9649,5693 @@ func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWstorezero)
+               v.reset(OpARM64MOVHUload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem)
+       // match: (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
        for {
-               i := v.AuxInt
-               s := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVWstorezero {
-                       break
-               }
-               j := x.AuxInt
-               if x.Aux != s {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHstorezero {
                        break
                }
-               _ = x.Args[1]
-               ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MUL_0(v *Value) bool {
-       // match: (MUL (NEG x) y)
+func rewriteValueARM64_OpARM64MOVHUloadidx_0(v *Value) bool {
+       // match: (MOVHUloadidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MNEG x y)
+       // result: (MOVHUload [c] ptr mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64NEG {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
-               v.reset(OpARM64MNEG)
-               v.AddArg(x)
-               v.AddArg(y)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL y (NEG x))
+       // match: (MOVHUloadidx (MOVDconst [c]) ptr mem)
        // cond:
-       // result: (MNEG x y)
+       // result: (MOVHUload [c] ptr mem)
        for {
-               _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64NEG {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               v.reset(OpARM64MNEG)
-               v.AddArg(x)
-               v.AddArg(y)
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL x (MOVDconst [-1]))
-       // cond:
-       // result: (NEG x)
+       // match: (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHstorezeroidx {
                        break
                }
-               if v_1.AuxInt != -1 {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MUL (MOVDconst [-1]) x)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUreg_0(v *Value) bool {
+       // match: (MOVHUreg x:(MOVBUload _ _))
        // cond:
-       // result: (NEG x)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               if v_0.AuxInt != -1 {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpARM64NEG)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MUL _ (MOVDconst [0]))
+       // match: (MOVHUreg x:(MOVHUload _ _))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               if v_1.AuxInt != 0 {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUload {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [0]) _)
+       // match: (MOVHUreg x:(MOVBUloadidx _ _ _))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if v_0.AuxInt != 0 {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg x:(MOVHUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MUL x (MOVDconst [1]))
+       // match: (MOVHUreg x:(MOVBUreg _))
        // cond:
-       // result: x
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if x.Op != OpARM64MOVBUreg {
                        break
                }
-               if v_1.AuxInt != 1 {
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg x:(MOVHUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUreg {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [1]) x)
+       // match: (MOVHUreg (ANDconst [c] x))
        // cond:
-       // result: x
+       // result: (ANDconst [c&(1<<16-1)] x)
        for {
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               if v_0.AuxInt != 1 {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<16 - 1)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(uint16(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint16(c))
                return true
        }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (MOVHUreg (SLLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<16-1, sc)
+       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<16-1, sc)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (MOVHUreg (SRLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<16-1, 0)
+       // result: (UBFX [arm64BFAuxInt(sc, 16)] x)
        for {
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<16-1, 0)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, 16)
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MUL_10(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHload_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && c >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
+       config := b.Func.Config
+       _ = config
+       // match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVHload [off1+off2] {sym} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && c >= 3) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
+               v.reset(OpARM64MOVHload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && c >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
+       // match: (MOVHload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHloadidx ptr idx mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && c >= 3) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
+               v.reset(OpARM64MOVHloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && c >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpARM64MOVHload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && c >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       // match: (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHstorezero {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHloadidx_0(v *Value) bool {
+       // match: (MOVHloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVHload [c] ptr mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       // match: (MOVHloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVHload [c] ptr mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       // match: (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHstorezeroidx {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHreg_0(v *Value) bool {
+       // match: (MOVHreg x:(MOVBload _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               if x.Op != OpARM64MOVBload {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       // match: (MOVHreg x:(MOVBUload _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MUL_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       // match: (MOVHreg x:(MOVHload _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               if x.Op != OpARM64MOVHload {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       // match: (MOVHreg x:(MOVBloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBloadidx {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [c]) (MOVDconst [d]))
+       // match: (MOVHreg x:(MOVBUloadidx _ _ _))
        // cond:
-       // result: (MOVDconst [c*d])
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c * d
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [d]) (MOVDconst [c]))
+       // match: (MOVHreg x:(MOVHloadidx _ _ _))
        // cond:
-       // result: (MOVDconst [c*d])
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               d := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHloadidx {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c * d
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW_0(v *Value) bool {
-       // match: (MULW (NEG x) y)
+       // match: (MOVHreg x:(MOVBreg _))
        // cond:
-       // result: (MNEGW x y)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64NEG {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBreg {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
-               v.reset(OpARM64MNEGW)
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (MULW y (NEG x))
+       // match: (MOVHreg x:(MOVBUreg _))
        // cond:
-       // result: (MNEGW x y)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64NEG {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
                        break
                }
-               x := v_1.Args[0]
-               v.reset(OpARM64MNEGW)
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==-1
-       // result: (NEG x)
+       // match: (MOVHreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(int32(c) == -1) {
+               if x.Op != OpARM64MOVHreg {
                        break
                }
-               v.reset(OpARM64NEG)
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==-1
-       // result: (NEG x)
+       // match: (MOVHreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int16(c))])
        for {
-               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               x := v.Args[1]
-               if !(int32(c) == -1) {
-                       break
-               }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int16(c))
                return true
        }
-       // match: (MULW _ (MOVDconst [c]))
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHreg_10(v *Value) bool {
+       // match: (MOVHreg (SLLconst [lc] x))
+       // cond: lc < 16
+       // result: (SBFIZ [arm64BFAuxInt(lc, 16-lc)] x)
        for {
-               _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 0) {
+               lc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(lc < 16) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BFAuxInt(lc, 16-lc)
+               v.AddArg(x)
                return true
        }
-       // match: (MULW (MOVDconst [c]) _)
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVHstore [off1+off2] {sym} ptr val mem)
        for {
-               _ = v.Args[1]
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := v_0.AuxInt
-               if !(int32(c) == 0) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==1
-       // result: x
+       // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHstoreidx ptr idx val mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 1) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==1
-       // result: x
+       // match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
        for {
-               _ = v.Args[1]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(int32(c) == 1) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVHstorezero [off] {sym} ptr mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHreg {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
+       // match: (MOVHstore [off] {sym} ptr (MOVHUreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               if v_1.Op != OpARM64MOVHUreg {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
+       // match: (MOVHstore [off] {sym} ptr (MOVWreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWreg {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       // match: (MOVHstore [off] {sym} ptr (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+               if v_1.Op != OpARM64MOVWUreg {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
-                       break
-               }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstore [i-2] {s} ptr0 w mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               if v_1.AuxInt != 16 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstore {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               if x.AuxInt != i-2 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if x.Aux != s {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstoreidx ptr1 idx1 w mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v.AuxInt != 2 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+               if v_1.AuxInt != 16 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MULW_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+func rewriteValueARM64_OpARM64MOVHstore_10(v *Value) bool {
+       // match: (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstore [i-2] {s} ptr0 w mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64UBFX {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstore {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+               if x.AuxInt != i-2 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c)*int32(d))])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if x.Aux != s {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c) * int32(d))
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULW (MOVDconst [d]) (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c)*int32(d))])
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstoreidx ptr1 idx1 w mem)
        for {
-               _ = v.Args[1]
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               d := v_0.AuxInt
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64UBFX {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c) * int32(d))
+               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MVN_0(v *Value) bool {
-       // match: (MVN (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [^c])
+       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstore [i-2] {s} ptr0 w mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = ^c
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64NEG_0(v *Value) bool {
-       // match: (NEG (MUL x y))
-       // cond:
-       // result: (MNEG x y)
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstoreidx ptr1 idx1 w mem)
        for {
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MUL {
+               if v_0.Op != OpARM64ADD {
                        break
                }
                _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpARM64MNEG)
-               v.AddArg(x)
-               v.AddArg(y)
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       // match: (NEG (MULW x y))
-       // cond:
-       // result: (MNEGW x y)
+       // match: (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstore [i-2] {s} ptr0 w0 mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MULW {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpARM64MNEGW)
-               v.AddArg(x)
-               v.AddArg(y)
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w0)
+               v.AddArg(mem)
                return true
        }
-       // match: (NEG (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [-c])
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstoreidx ptr1 idx1 w0 mem)
        for {
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -c
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64NotEqual_0(v *Value) bool {
-       // match: (NotEqual (FlagEQ))
+func rewriteValueARM64_OpARM64MOVHstoreidx_0(v *Value) bool {
+       // match: (MOVHstoreidx ptr (MOVDconst [c]) val mem)
        // cond:
-       // result: (MOVDconst [0])
+       // result: (MOVHstore [c] ptr val mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (NotEqual (FlagLT_ULT))
+       // match: (MOVHstoreidx (MOVDconst [c]) idx val mem)
        // cond:
-       // result: (MOVDconst [1])
+       // result: (MOVHstore [c] idx val mem)
        for {
+               _ = v.Args[3]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (NotEqual (FlagLT_UGT))
+       // match: (MOVHstoreidx ptr idx (MOVDconst [0]) mem)
        // cond:
-       // result: (MOVDconst [1])
+       // result: (MOVHstorezeroidx ptr idx mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (FlagGT_ULT))
+               if v_2.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx ptr idx (MOVHreg x) mem)
        // cond:
-       // result: (MOVDconst [1])
+       // result: (MOVHstoreidx ptr idx x mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHreg {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (NotEqual (FlagGT_UGT))
+       // match: (MOVHstoreidx ptr idx (MOVHUreg x) mem)
        // cond:
-       // result: (MOVDconst [1])
+       // result: (MOVHstoreidx ptr idx x mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHUreg {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (NotEqual (InvertFlags x))
+       // match: (MOVHstoreidx ptr idx (MOVWreg x) mem)
        // cond:
-       // result: (NotEqual x)
+       // result: (MOVHstoreidx ptr idx x mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64NotEqual)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx ptr idx (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVHstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64OR_0(v *Value) bool {
-       // match: (OR x (MOVDconst [c]))
-       // cond:
-       // result: (ORconst [c] x)
+func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVHstorezero [off1+off2] {sym} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR (MOVDconst [c]) x)
-       // cond:
-       // result: (ORconst [c] x)
+       // match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := v_0.AuxInt
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHstorezeroidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,2) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstorezero [min(i,j)] {s} ptr0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
                x := v.Args[1]
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               if x.Op != OpARM64MOVHstorezero {
+                       break
+               }
+               j := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 2) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR x x)
-       // cond:
-       // result: x
+       // match: (MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstorezeroidx ptr1 idx1 mem)
        for {
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVHstorezeroidx {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR x (MVN y))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool {
+       // match: (MOVHstorezeroidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (ORN x y)
+       // result: (MOVHstorezero [c] ptr mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MVN {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpARM64ORN)
-               v.AddArg(x)
-               v.AddArg(y)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR (MVN y) x)
+       // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem)
        // cond:
-       // result: (ORN x y)
+       // result: (MOVHstorezero [c] idx mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MVN {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpARM64ORN)
-               v.AddArg(x)
-               v.AddArg(y)
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (OR x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftLL x0 y [c])
+       return false
+}
+func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVQstorezero [off1+off2] {sym} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWUload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWUloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWUloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
+       // match: (MOVWUloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVWUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx {
+                       break
+               }
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
+       // match: (MOVWUreg x:(MOVBUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVHUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVWUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVHUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVWUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVBUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVHUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVWUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg (ANDconst [c] x))
+       // cond:
+       // result: (ANDconst [c&(1<<32-1)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<32 - 1)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool {
+       // match: (MOVWUreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(uint32(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint32(c))
+               return true
+       }
+       // match: (MOVWUreg (SLLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<32-1, sc)
+       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<32-1, sc)) {
+                       break
+               }
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg (SRLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<32-1, 0)
+       // result: (UBFX [arm64BFAuxInt(sc, 32)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
+                       break
+               }
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<32-1, 0)) {
+                       break
+               }
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, 32)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool {
+       // match: (MOVWloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVWload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx {
+                       break
+               }
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool {
+       // match: (MOVWreg x:(MOVBload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool {
+       // match: (MOVWreg x:(MOVBreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c))
+               return true
+       }
+       // match: (MOVWreg (SLLconst [lc] x))
+       // cond: lc < 32
+       // result: (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               lc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(lc < 32) {
+                       break
+               }
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BFAuxInt(lc, 32-lc)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstoreidx ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVWstorezero [off] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstore [i-4] {s} ptr0 w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVDstoreidx ptr1 idx1 w mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstore [i-4] {s} ptr0 w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVDstoreidx ptr1 idx1 w0 mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstoreidx_0(v *Value) bool {
+       // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem)
+       // cond:
+       // result: (MOVWstore [c] ptr val mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx (MOVDconst [c]) idx val mem)
+       // cond:
+       // result: (MOVWstore [c] idx val mem)
+       for {
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVWstorezeroidx ptr idx mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_2.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr idx (MOVWreg x) mem)
+       // cond:
+       // result: (MOVWstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVWstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstorezero [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstorezeroidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezero {
+                       break
+               }
+               j := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVDstorezeroidx ptr1 idx1 mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezeroidx {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v *Value) bool {
+       // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWstorezero [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem)
+       // cond:
+       // result: (MOVWstorezero [c] idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MUL_0(v *Value) bool {
+       // match: (MUL (NEG x) y)
+       // cond:
+       // result: (MNEG x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64NEG {
+                       break
+               }
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64MNEG)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MUL y (NEG x))
+       // cond:
+       // result: (MNEG x y)
+       for {
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64NEG {
+                       break
+               }
+               x := v_1.Args[0]
+               v.reset(OpARM64MNEG)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MUL x (MOVDconst [-1]))
+       // cond:
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [-1]) x)
+       // cond:
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL _ (MOVDconst [0]))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MUL (MOVDconst [0]) _)
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MUL x (MOVDconst [1]))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [1]) x)
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MUL_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MUL_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c*d])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c * d
+               return true
+       }
+       // match: (MUL (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c*d])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c * d
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MULW_0(v *Value) bool {
+       // match: (MULW (NEG x) y)
+       // cond:
+       // result: (MNEGW x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64NEG {
+                       break
+               }
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64MNEGW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MULW y (NEG x))
+       // cond:
+       // result: (MNEGW x y)
+       for {
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64NEG {
+                       break
+               }
+               x := v_1.Args[0]
+               v.reset(OpARM64MNEGW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==-1
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: int32(c)==-1
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW _ (MOVDconst [c]))
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) _)
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==1
+       // result: x
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(int32(c) == 1) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: int32(c)==1
+       // result: x
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(int32(c) == 1) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MULW_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MULW_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c)*int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c) * int32(d))
+               return true
+       }
+       // match: (MULW (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c)*int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c) * int32(d))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MVN_0(v *Value) bool {
+       // match: (MVN (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [^c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = ^c
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NEG_0(v *Value) bool {
+       // match: (NEG (MUL x y))
+       // cond:
+       // result: (MNEG x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MUL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpARM64MNEG)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (NEG (MULW x y))
+       // cond:
+       // result: (MNEGW x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MULW {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpARM64MNEGW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (NEG (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [-c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -c
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NotEqual_0(v *Value) bool {
+       // match: (NotEqual (FlagEQ))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagEQ {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (NotEqual (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (FlagGT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (FlagGT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (InvertFlags x))
+       // cond:
+       // result: (NotEqual x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpARM64NotEqual)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64OR_0(v *Value) bool {
+       // match: (OR x (MOVDconst [c]))
+       // cond:
+       // result: (ORconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (MOVDconst [c]) x)
+       // cond:
+       // result: (ORconst [c] x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR x x)
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR x (MVN y))
+       // cond:
+       // result: (ORN x y)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MVN {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpARM64ORN)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR (MVN y) x)
+       // cond:
+       // result: (ORN x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MVN {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ORN)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftLL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR x1:(SLLconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftLL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR x1:(SRLconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRA x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (OR x1:(SRAconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRA x0 y [c])
+       for {
+               _ = v.Args[1]
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR (UBFIZ [bfc] x) (ANDconst [ac] y))
+       // cond: ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
+       // result: (BFI [bfc] y x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFIZ {
+                       break
+               }
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               ac := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(ac == ^((1<<uint(getARM64BFwidth(bfc)) - 1) << uint(getARM64BFlsb(bfc)))) {
+                       break
+               }
+               v.reset(OpARM64BFI)
+               v.AuxInt = bfc
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (ANDconst [ac] y) (UBFIZ [bfc] x))
+       // cond: ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
+       // result: (BFI [bfc] y x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
+                       break
+               }
+               ac := v_0.AuxInt
+               y := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFIZ {
+                       break
+               }
+               bfc := v_1.AuxInt
+               x := v_1.Args[0]
+               if !(ac == ^((1<<uint(getARM64BFwidth(bfc)) - 1) << uint(getARM64BFlsb(bfc)))) {
+                       break
+               }
+               v.reset(OpARM64BFI)
+               v.AuxInt = bfc
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (UBFX [bfc] x) (ANDconst [ac] y))
+       // cond: ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
+       // result: (BFXIL [bfc] y x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFX {
+                       break
+               }
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               ac := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(ac == ^(1<<uint(getARM64BFwidth(bfc)) - 1)) {
+                       break
+               }
+               v.reset(OpARM64BFXIL)
+               v.AuxInt = bfc
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (ANDconst [ac] y) (UBFX [bfc] x))
+       // cond: ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
+       // result: (BFXIL [bfc] y x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
+                       break
+               }
+               ac := v_0.AuxInt
+               y := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
+                       break
+               }
+               bfc := v_1.AuxInt
+               x := v_1.Args[0]
+               if !(ac == ^(1<<uint(getARM64BFwidth(bfc)) - 1)) {
+                       break
+               }
+               v.reset(OpARM64BFXIL)
+               v.AuxInt = bfc
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x3.AuxInt
+               s := x3.Aux
+               _ = x3.Args[1]
+               p := x3.Args[0]
+               mem := x3.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[1]
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x0.AuxInt != 3 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 1 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               p1 := x2.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x3.Args[2]
+               ptr0 := x3.Args[0]
+               idx0 := x3.Args[1]
+               if mem != x3.Args[2] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x3.Args[2]
+               ptr0 := x3.Args[0]
+               idx0 := x3.Args[1]
+               mem := x3.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x0.AuxInt != 3 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 1 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               p1 := x2.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i5 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i4 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x5.AuxInt
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x6.AuxInt
+               if x6.Aux != s {
+                       break
+               }
+               _ = x6.Args[1]
+               if p != x6.Args[0] {
+                       break
+               }
+               if mem != x6.Args[1] {
+                       break
+               }
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x7.AuxInt
+               if x7.Aux != s {
+                       break
+               }
+               _ = x7.Args[1]
+               if p != x7.Args[0] {
+                       break
+               }
+               if mem != x7.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x7.AuxInt
+               s := x7.Aux
+               _ = x7.Args[1]
+               p := x7.Args[0]
+               mem := x7.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[1]
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i5 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i4 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x5.AuxInt
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x6.AuxInt
+               if x6.Aux != s {
+                       break
+               }
+               _ = x6.Args[1]
+               if p != x6.Args[0] {
+                       break
+               }
+               if mem != x6.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x0.AuxInt != 7 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x3.AuxInt != 4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x4.AuxInt != 3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x5.AuxInt != 2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x6.AuxInt != 1 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               _ = x6.Args[1]
+               p1 := x6.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x6.Args[1] {
+                       break
+               }
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x7.Args[2]
+               ptr0 := x7.Args[0]
+               idx0 := x7.Args[1]
+               if mem != x7.Args[2] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x7.Args[2]
+               ptr0 := x7.Args[0]
+               idx0 := x7.Args[1]
+               mem := x7.Args[2]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x0.AuxInt != 7 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if mem != x2.Args[1] {
                        break
                }
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR x1:(SLLconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftLL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SLLconst {
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRLconst {
+               if x3.AuxInt != 4 {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if x3.Aux != s {
                        break
                }
-               v.reset(OpARM64ORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR x1:(SRLconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SRLconst {
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               if mem != x3.Args[1] {
                        break
                }
-               v.reset(OpARM64ORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRAconst {
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64ORshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (OR x1:(SRAconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SRAconst {
+               if x4.AuxInt != 3 {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               if x4.Aux != s {
                        break
                }
-               v.reset(OpARM64ORshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR (UBFIZ [bfc] x) (ANDconst [ac] y))
-       // cond: ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
-       // result: (BFI [bfc] y x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFIZ {
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ANDconst {
+               if mem != x4.Args[1] {
                        break
                }
-               ac := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(ac == ^((1<<uint(getARM64BFwidth(bfc)) - 1) << uint(getARM64BFlsb(bfc)))) {
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
                        break
                }
-               v.reset(OpARM64BFI)
-               v.AuxInt = bfc
-               v.AddArg(y)
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR (ANDconst [ac] y) (UBFIZ [bfc] x))
-       // cond: ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
-       // result: (BFI [bfc] y x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
                        break
                }
-               ac := v_0.AuxInt
-               y := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFIZ {
+               if x5.AuxInt != 2 {
                        break
                }
-               bfc := v_1.AuxInt
-               x := v_1.Args[0]
-               if !(ac == ^((1<<uint(getARM64BFwidth(bfc)) - 1) << uint(getARM64BFlsb(bfc)))) {
+               if x5.Aux != s {
                        break
                }
-               v.reset(OpARM64BFI)
-               v.AuxInt = bfc
-               v.AddArg(y)
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR (UBFX [bfc] x) (ANDconst [ac] y))
-       // cond: ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
-       // result: (BFXIL [bfc] y x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFX {
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ANDconst {
+               if mem != x5.Args[1] {
                        break
                }
-               ac := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(ac == ^(1<<uint(getARM64BFwidth(bfc)) - 1)) {
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
                        break
                }
-               v.reset(OpARM64BFXIL)
-               v.AuxInt = bfc
-               v.AddArg(y)
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR (ANDconst [ac] y) (UBFX [bfc] x))
-       // cond: ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
-       // result: (BFXIL [bfc] y x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
                        break
                }
-               ac := v_0.AuxInt
-               y := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
+               if x6.AuxInt != 1 {
                        break
                }
-               bfc := v_1.AuxInt
-               x := v_1.Args[0]
-               if !(ac == ^(1<<uint(getARM64BFwidth(bfc)) - 1)) {
+               if x6.Aux != s {
                        break
                }
-               v.reset(OpARM64BFXIL)
-               v.AuxInt = bfc
-               v.AddArg(y)
-               v.AddArg(x)
+               _ = x6.Args[1]
+               p1 := x6.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x6.Args[1] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
        // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -10838,7 +15370,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x0.AuxInt
+               i0 := x0.AuxInt
                s := x0.Aux
                _ = x0.Args[1]
                p := x0.Args[0]
@@ -10851,7 +15383,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x1.AuxInt
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -10870,7 +15402,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x2.AuxInt
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -10889,7 +15421,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i0 := x3.AuxInt
+               i3 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -10904,20 +15436,22 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem))))
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))))
        // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -10929,7 +15463,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i0 := x3.AuxInt
+               i3 := x3.AuxInt
                s := x3.Aux
                _ = x3.Args[1]
                p := x3.Args[0]
@@ -10965,7 +15499,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x0.AuxInt
+               i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
@@ -10984,7 +15518,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x1.AuxInt
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -11003,7 +15537,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x2.AuxInt
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -11018,20 +15552,22 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -11051,43 +15587,11 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o2.AuxInt != 24 {
-                       break
-               }
-               _ = o2.Args[1]
-               o3 := o2.Args[0]
-               if o3.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o3.AuxInt != 32 {
-                       break
-               }
-               _ = o3.Args[1]
-               o4 := o3.Args[0]
-               if o4.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o4.AuxInt != 40 {
-                       break
-               }
-               _ = o4.Args[1]
-               o5 := o4.Args[0]
-               if o5.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o5.AuxInt != 48 {
-                       break
-               }
-               _ = o5.Args[1]
-               s0 := o5.Args[0]
+               s0 := o1.Args[0]
                if s0.Op != OpARM64SLLconst {
                        break
                }
-               if s0.AuxInt != 56 {
+               if s0.AuxInt != 24 {
                        break
                }
                y0 := s0.Args[0]
@@ -11095,15 +15599,14 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i7 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o5.Args[1]
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o1.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -11111,18 +15614,22 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x1.AuxInt
-               if x1.Aux != s {
+               if x1.AuxInt != 1 {
                        break
                }
+               s := x1.Aux
                _ = x1.Args[1]
-               if p != x1.Args[0] {
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
-               y2 := o4.Args[1]
+               y2 := o0.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
@@ -11130,18 +15637,18 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x2.AuxInt
-               if x2.Aux != s {
+               if x2.AuxInt != 2 {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if x2.Aux != s {
                        break
                }
+               _ = x2.Args[1]
+               p := x2.Args[0]
                if mem != x2.Args[1] {
                        break
                }
-               y3 := o3.Args[1]
+               y3 := v.Args[1]
                if y3.Op != OpARM64MOVDnop {
                        break
                }
@@ -11149,7 +15656,9 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x3.AuxInt
+               if x3.AuxInt != 3 {
+                       break
+               }
                if x3.Aux != s {
                        break
                }
@@ -11160,119 +15669,145 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if mem != x3.Args[1] {
                        break
                }
-               y4 := o2.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               i3 := x4.AuxInt
-               if x4.Aux != s {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               if x3.AuxInt != 3 {
                        break
                }
-               if mem != x4.Args[1] {
+               s := x3.Aux
+               _ = x3.Args[1]
+               p := x3.Args[0]
+               mem := x3.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
+               if o0.AuxInt != 8 {
                        break
                }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUload {
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               i2 := x5.AuxInt
-               if x5.Aux != s {
+               if o1.AuxInt != 16 {
                        break
                }
-               _ = x5.Args[1]
-               if p != x5.Args[0] {
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               if mem != x5.Args[1] {
+               if s0.AuxInt != 24 {
                        break
                }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUload {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i1 := x6.AuxInt
-               if x6.Aux != s {
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               if mem != x0.Args[2] {
                        break
                }
-               _ = x6.Args[1]
-               if p != x6.Args[0] {
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               if mem != x6.Args[1] {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               y7 := v.Args[1]
-               if y7.Op != OpARM64MOVDnop {
+               if x1.AuxInt != 1 {
                        break
                }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
+               if x1.Aux != s {
                        break
                }
-               i0 := x7.AuxInt
-               if x7.Aux != s {
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
-               _ = x7.Args[1]
-               if p != x7.Args[0] {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
                        break
                }
-               if mem != x7.Args[1] {
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
+               if x2.AuxInt != 2 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
        // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
        // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                _ = v.Args[1]
-               y7 := v.Args[0]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
-                       break
-               }
-               i0 := x7.AuxInt
-               s := x7.Aux
-               _ = x7.Args[1]
-               p := x7.Args[0]
-               mem := x7.Args[1]
-               o0 := v.Args[1]
+               o0 := v.Args[0]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -11335,17 +15870,11 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i7 := x0.AuxInt
-               if x0.Aux != s {
-                       break
-               }
+               i0 := x0.AuxInt
+               s := x0.Aux
                _ = x0.Args[1]
-               if p != x0.Args[0] {
-                       break
-               }
-               if mem != x0.Args[1] {
-                       break
-               }
+               p := x0.Args[0]
+               mem := x0.Args[1]
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -11354,7 +15883,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x1.AuxInt
+               i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
@@ -11373,7 +15902,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x2.AuxInt
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
@@ -11392,7 +15921,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x3.AuxInt
+               i3 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -11411,7 +15940,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x4.AuxInt
+               i4 := x4.AuxInt
                if x4.Aux != s {
                        break
                }
@@ -11430,7 +15959,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x5.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x5.AuxInt
+               i5 := x5.AuxInt
                if x5.Aux != s {
                        break
                }
@@ -11449,7 +15978,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if x6.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x6.AuxInt
+               i6 := x6.AuxInt
                if x6.Aux != s {
                        break
                }
@@ -11460,6 +15989,25 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if mem != x6.Args[1] {
                        break
                }
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x7.AuxInt
+               if x7.Aux != s {
+                       break
+               }
+               _ = x7.Args[1]
+               if p != x7.Args[0] {
+                       break
+               }
+               if mem != x7.Args[1] {
+                       break
+               }
                if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
@@ -11477,13 +16025,26 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                _ = v.Args[1]
-               o0 := v.Args[0]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x7.AuxInt
+               s := x7.Aux
+               _ = x7.Args[1]
+               p := x7.Args[0]
+               mem := x7.Args[1]
+               o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -11499,11 +16060,43 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               s0 := o1.Args[0]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
                if s0.Op != OpARM64SLLconst {
                        break
                }
-               if s0.AuxInt != 24 {
+               if s0.AuxInt != 56 {
                        break
                }
                y0 := s0.Args[0]
@@ -11515,11 +16108,17 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                        break
                }
                i0 := x0.AuxInt
-               s := x0.Aux
+               if x0.Aux != s {
+                       break
+               }
                _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o1.Args[1]
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -11538,7 +16137,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if mem != x1.Args[1] {
                        break
                }
-               y2 := o0.Args[1]
+               y2 := o4.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
@@ -11557,7 +16156,7 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if mem != x2.Args[1] {
                        break
                }
-               y3 := v.Args[1]
+               y3 := o3.Args[1]
                if y3.Op != OpARM64MOVDnop {
                        break
                }
@@ -11576,135 +16175,71 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                if mem != x3.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
-                       break
-               }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
-       for {
-               t := v.Type
-               _ = v.Args[1]
-               y3 := v.Args[0]
-               if y3.Op != OpARM64MOVDnop {
-                       break
-               }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
-                       break
-               }
-               i3 := x3.AuxInt
-               s := x3.Aux
-               _ = x3.Args[1]
-               p := x3.Args[0]
-               mem := x3.Args[1]
-               o0 := v.Args[1]
-               if o0.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o0.AuxInt != 8 {
-                       break
-               }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o1.AuxInt != 16 {
-                       break
-               }
-               _ = o1.Args[1]
-               s0 := o1.Args[0]
-               if s0.Op != OpARM64SLLconst {
-                       break
-               }
-               if s0.AuxInt != 24 {
-                       break
-               }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i4 := x4.AuxInt
+               if x4.Aux != s {
                        break
                }
-               _ = x0.Args[1]
-               if p != x0.Args[0] {
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
                        break
                }
-               if mem != x0.Args[1] {
+               if mem != x4.Args[1] {
                        break
                }
-               y1 := o1.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i5 := x5.AuxInt
+               if x5.Aux != s {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if mem != x5.Args[1] {
                        break
                }
-               y2 := o0.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x2.AuxInt
-               if x2.Aux != s {
+               i6 := x6.AuxInt
+               if x6.Aux != s {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               _ = x6.Args[1]
+               if p != x6.Args[0] {
                        break
                }
-               if mem != x2.Args[1] {
+               if mem != x6.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
                v1.Aux = s
                v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
                v2.AuxInt = i0
@@ -11714,9 +16249,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -11780,14 +16315,13 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -11796,14 +16330,18 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x1.AuxInt != 1 {
                        break
                }
+               s := x1.Aux
                _ = x1.Args[1]
-               if p != x1.Args[0] {
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
@@ -11815,14 +16353,14 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x2.AuxInt
-               if x2.Aux != s {
+               if x2.AuxInt != 2 {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if x2.Aux != s {
                        break
                }
+               _ = x2.Args[1]
+               p := x2.Args[0]
                if mem != x2.Args[1] {
                        break
                }
@@ -11834,7 +16372,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x3.AuxInt
+               if x3.AuxInt != 3 {
+                       break
+               }
                if x3.Aux != s {
                        break
                }
@@ -11853,7 +16393,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x4.AuxInt
+               if x4.AuxInt != 4 {
+                       break
+               }
                if x4.Aux != s {
                        break
                }
@@ -11872,7 +16414,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x5.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x5.AuxInt
+               if x5.AuxInt != 5 {
+                       break
+               }
                if x5.Aux != s {
                        break
                }
@@ -11891,7 +16435,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x6.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x6.AuxInt
+               if x6.AuxInt != 6 {
+                       break
+               }
                if x6.Aux != s {
                        break
                }
@@ -11910,7 +16456,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x7.Op != OpARM64MOVBUload {
                        break
                }
-               i7 := x7.AuxInt
+               if x7.AuxInt != 7 {
+                       break
+               }
                if x7.Aux != s {
                        break
                }
@@ -11921,26 +16469,28 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x7.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
                v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       return false
+}
+func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -11952,7 +16502,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x7.Op != OpARM64MOVBUload {
                        break
                }
-               i7 := x7.AuxInt
+               if x7.AuxInt != 7 {
+                       break
+               }
                s := x7.Aux
                _ = x7.Args[1]
                p := x7.Args[0]
@@ -12017,18 +16569,13 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
-                       break
-               }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
-                       break
-               }
-               _ = x0.Args[1]
-               if p != x0.Args[0] {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if mem != x0.Args[1] {
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               if mem != x0.Args[2] {
                        break
                }
                y1 := o5.Args[1]
@@ -12039,14 +16586,20 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x1.AuxInt
+               if x1.AuxInt != 1 {
+                       break
+               }
                if x1.Aux != s {
                        break
                }
                _ = x1.Args[1]
-               if p != x1.Args[0] {
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
@@ -12058,7 +16611,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x2.AuxInt
+               if x2.AuxInt != 2 {
+                       break
+               }
                if x2.Aux != s {
                        break
                }
@@ -12077,7 +16632,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x3.AuxInt
+               if x3.AuxInt != 3 {
+                       break
+               }
                if x3.Aux != s {
                        break
                }
@@ -12096,7 +16653,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x4.AuxInt
+               if x4.AuxInt != 4 {
+                       break
+               }
                if x4.Aux != s {
                        break
                }
@@ -12115,7 +16674,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x5.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x5.AuxInt
+               if x5.AuxInt != 5 {
+                       break
+               }
                if x5.Aux != s {
                        break
                }
@@ -12134,7 +16695,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x6.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x6.AuxInt
+               if x6.AuxInt != 6 {
+                       break
+               }
                if x6.Aux != s {
                        break
                }
@@ -12145,19 +16708,16 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x6.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
                v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
@@ -12680,6 +17240,67 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 8 {
+                       break
+               }
+               _ = v.Args[1]
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := v.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 1 {
+                       break
+               }
+               s := x1.Aux
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
        // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
        // result: @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
@@ -12759,11 +17380,85 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
-       b := v.Block
-       _ = b
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 24 {
+                       break
+               }
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 16 {
+                       break
+               }
+               _ = o0.Args[1]
+               x0 := o0.Args[0]
+               if x0.Op != OpARM64MOVHUloadidx {
+                       break
+               }
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o0.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 2 {
+                       break
+               }
+               s := x1.Aux
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 3 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               p := x2.Args[0]
+               if mem != x2.Args[1] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
+               return true
+       }
        // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
        // cond: i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
        // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
@@ -12814,14 +17509,150 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x1.AuxInt
-               if x1.Aux != s {
+               i4 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o1.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i5 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o0.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := v.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               if !(i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 56 {
+                       break
+               }
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 48 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 40 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 32 {
+                       break
+               }
+               _ = o2.Args[1]
+               x0 := o2.Args[0]
+               if x0.Op != OpARM64MOVWUloadidx {
+                       break
+               }
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o2.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 4 {
                        break
                }
+               s := x1.Aux
                _ = x1.Args[1]
-               if p != x1.Args[0] {
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
@@ -12833,14 +17664,14 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x2.AuxInt
-               if x2.Aux != s {
+               if x2.AuxInt != 5 {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if x2.Aux != s {
                        break
                }
+               _ = x2.Args[1]
+               p := x2.Args[0]
                if mem != x2.Args[1] {
                        break
                }
@@ -12852,7 +17683,9 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x3.AuxInt
+               if x3.AuxInt != 6 {
+                       break
+               }
                if x3.Aux != s {
                        break
                }
@@ -12871,7 +17704,9 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               i7 := x4.AuxInt
+               if x4.AuxInt != 7 {
+                       break
+               }
                if x4.Aux != s {
                        break
                }
@@ -12882,18 +17717,15 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if mem != x4.Args[1] {
                        break
                }
-               if !(i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
-               v0.AddArg(v1)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
                v0.AddArg(mem)
                return true
        }
@@ -12953,6 +17785,64 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                v0.AddArg(v1)
                return true
        }
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
+       for {
+               t := v.Type
+               if v.AuxInt != 8 {
+                       break
+               }
+               _ = v.Args[1]
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x0.AuxInt != 1 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p1 := x0.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               mem := x0.Args[1]
+               y1 := v.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x1.Args[2]
+               ptr0 := x1.Args[0]
+               idx0 := x1.Args[1]
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
        // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [i2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
        // cond: i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)
        // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
@@ -13038,6 +17928,91 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                v0.AddArg(v1)
                return true
        }
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
+       for {
+               t := v.Type
+               if v.AuxInt != 24 {
+                       break
+               }
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 16 {
+                       break
+               }
+               _ = o0.Args[1]
+               y0 := o0.Args[0]
+               if y0.Op != OpARM64REV16W {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVHUload {
+                       break
+               }
+               if x0.AuxInt != 2 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o0.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 1 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x2.Args[2]
+               ptr0 := x2.Args[0]
+               idx0 := x2.Args[1]
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
        // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [i4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
        // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
        // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
@@ -13177,6 +18152,149 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                v0.AddArg(v1)
                return true
        }
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
+       for {
+               t := v.Type
+               if v.AuxInt != 56 {
+                       break
+               }
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 48 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 40 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 32 {
+                       break
+               }
+               _ = o2.Args[1]
+               y0 := o2.Args[0]
+               if y0.Op != OpARM64REVW {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVWUload {
+                       break
+               }
+               if x0.AuxInt != 4 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o2.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 3 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o1.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 2 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o0.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x3.AuxInt != 1 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               p1 := x3.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := v.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x4.Args[2]
+               ptr0 := x4.Args[0]
+               idx0 := x4.Args[1]
+               if mem != x4.Args[2] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64ORshiftRA_0(v *Value) bool {
index 17323bd2ab2c381bcd3b4c676420257301313340..631c8e6879d47d8932876cdd9cf9d430eb5b17b6 100644 (file)
@@ -22,7 +22,7 @@ var sink16 uint16
 func load_le64(b []byte) {
        // amd64:`MOVQ\s\(.*\),`
        // s390x:`MOVDBR\s\(.*\),`
-       // arm64:`MOVD\s\(R[0-9]+\),`
+       // arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`
        // ppc64le:`MOVD\s`,-`MOV[BHW]Z`
        sink64 = binary.LittleEndian.Uint64(b)
 }
@@ -30,7 +30,7 @@ func load_le64(b []byte) {
 func load_le64_idx(b []byte, idx int) {
        // amd64:`MOVQ\s\(.*\)\(.*\*1\),`
        // s390x:`MOVDBR\s\(.*\)\(.*\*1\),`
-       // arm64:`MOVD\s\(R[0-9]+\),`
+       // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]`
        // ppc64le:`MOVD\s`,-`MOV[BHW]Z\s`
        sink64 = binary.LittleEndian.Uint64(b[idx:])
 }
@@ -38,7 +38,7 @@ func load_le64_idx(b []byte, idx int) {
 func load_le32(b []byte) {
        // amd64:`MOVL\s\(.*\),`           386:`MOVL\s\(.*\),`
        // s390x:`MOVWBR\s\(.*\),`
-       // arm64:`MOVWU\s\(R[0-9]+\),`
+       // arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`
        // ppc64le:`MOVWZ\s`
        sink32 = binary.LittleEndian.Uint32(b)
 }
@@ -46,7 +46,7 @@ func load_le32(b []byte) {
 func load_le32_idx(b []byte, idx int) {
        // amd64:`MOVL\s\(.*\)\(.*\*1\),`  386:`MOVL\s\(.*\)\(.*\*1\),`
        // s390x:`MOVWBR\s\(.*\)\(.*\*1\),`
-       // arm64:`MOVWU\s\(R[0-9]+\),`
+       // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]`
        // ppc64le:`MOVWZ\s`
        sink32 = binary.LittleEndian.Uint32(b[idx:])
 }
@@ -54,50 +54,54 @@ func load_le32_idx(b []byte, idx int) {
 func load_le16(b []byte) {
        // amd64:`MOVWLZX\s\(.*\),`
        // ppc64le:`MOVHZ\s`
+       // arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB`
        sink16 = binary.LittleEndian.Uint16(b)
 }
 
 func load_le16_idx(b []byte, idx int) {
        // amd64:`MOVWLZX\s\(.*\),`
        // ppc64le:`MOVHZ\s`
+       // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
        sink16 = binary.LittleEndian.Uint16(b[idx:])
 }
 
 func load_be64(b []byte) {
        // amd64:`BSWAPQ`
        // s390x:`MOVD\s\(.*\),`
-       // arm64:`REV`
+       // arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W`
        sink64 = binary.BigEndian.Uint64(b)
 }
 
 func load_be64_idx(b []byte, idx int) {
        // amd64:`BSWAPQ`
        // s390x:`MOVD\s\(.*\)\(.*\*1\),`
-       // arm64:`REV`
+       // arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W`
        sink64 = binary.BigEndian.Uint64(b[idx:])
 }
 
 func load_be32(b []byte) {
        // amd64:`BSWAPL`
        // s390x:`MOVWZ\s\(.*\),`
-       // arm64:`REVW`
+       // arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W`
        sink32 = binary.BigEndian.Uint32(b)
 }
 
 func load_be32_idx(b []byte, idx int) {
        // amd64:`BSWAPL`
        // s390x:`MOVWZ\s\(.*\)\(.*\*1\),`
-       // arm64:`REVW`
+       // arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W`
        sink32 = binary.BigEndian.Uint32(b[idx:])
 }
 
 func load_be16(b []byte) {
        // amd64:`ROLW\s\$8`
+       // arm64: `REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB`
        sink16 = binary.BigEndian.Uint16(b)
 }
 
 func load_be16_idx(b []byte, idx int) {
        // amd64:`ROLW\s\$8`
+       // arm64: `REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
        sink16 = binary.BigEndian.Uint16(b[idx:])
 }
 
@@ -162,7 +166,7 @@ func store_le64(b []byte) {
 
 func store_le64_idx(b []byte, idx int) {
        // amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
-       // arm64:`MOVD`,-`MOV[WBH]`
+       // arm64:`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`
        // ppc64le:`MOVD\s`,-`MOV[BHW]\s`
        binary.LittleEndian.PutUint64(b[idx:], sink64)
 }
@@ -176,7 +180,7 @@ func store_le32(b []byte) {
 
 func store_le32_idx(b []byte, idx int) {
        // amd64:`MOVL\s`
-       // arm64:`MOVW`,-`MOV[BH]`
+       // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`
        // ppc64le:`MOVW\s`
        binary.LittleEndian.PutUint32(b[idx:], sink32)
 }
@@ -190,32 +194,32 @@ func store_le16(b []byte) {
 
 func store_le16_idx(b []byte, idx int) {
        // amd64:`MOVW\s`
-       // arm64:`MOVH`,-`MOVB`
+       // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
        // ppc64le(DISABLED):`MOVH\s`
        binary.LittleEndian.PutUint16(b[idx:], sink16)
 }
 
 func store_be64(b []byte) {
        // amd64:`BSWAPQ`,-`SHR.`
-       // arm64:`MOVD`,`REV`,-`MOV[WBH]`
+       // arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W`
        binary.BigEndian.PutUint64(b, sink64)
 }
 
 func store_be64_idx(b []byte, idx int) {
        // amd64:`BSWAPQ`,-`SHR.`
-       // arm64:`MOVD`,`REV`,-`MOV[WBH]`
+       // arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW`
        binary.BigEndian.PutUint64(b[idx:], sink64)
 }
 
 func store_be32(b []byte) {
        // amd64:`BSWAPL`,-`SHR.`
-       // arm64:`MOVW`,`REVW`,-`MOV[BH]`
+       // arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W`
        binary.BigEndian.PutUint32(b, sink32)
 }
 
 func store_be32_idx(b []byte, idx int) {
        // amd64:`BSWAPL`,-`SHR.`
-       // arm64:`MOVW`,`REVW`,-`MOV[BH]`
+       // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W`
        binary.BigEndian.PutUint32(b[idx:], sink32)
 }
 
@@ -227,7 +231,7 @@ func store_be16(b []byte) {
 
 func store_be16_idx(b []byte, idx int) {
        // amd64:`ROLW\s\$8`,-`SHR.`
-       // arm64:`MOVH`,`REV16W`,-`MOVB`
+       // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
        binary.BigEndian.PutUint16(b[idx:], sink16)
 }