]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize arm64's MADD and MSUB
authorBen Shi <powerman1st@163.com>
Thu, 27 Sep 2018 13:21:03 +0000 (13:21 +0000)
committerCherry Zhang <cherryyz@google.com>
Fri, 28 Sep 2018 15:03:17 +0000 (15:03 +0000)
This CL implements constant folding for MADD/MSUB on arm64.

1. The total size of pkg/android_arm64/ decreases about 4KB,
   excluding cmd/compile/ .

2. There is no regression in the go1 benchmark, excluding noise.
name                     old time/op    new time/op    delta
BinaryTree17-4              16.4s ± 1%     16.5s ± 1%  +0.24%  (p=0.008 n=29+29)
Fannkuch11-4                8.73s ± 0%     8.71s ± 0%  -0.15%  (p=0.000 n=29+29)
FmtFprintfEmpty-4           174ns ± 0%     174ns ± 0%    ~     (all equal)
FmtFprintfString-4          370ns ± 0%     372ns ± 2%  +0.53%  (p=0.007 n=24+30)
FmtFprintfInt-4             419ns ± 0%     419ns ± 0%    ~     (all equal)
FmtFprintfIntInt-4          673ns ± 1%     661ns ± 1%  -1.81%  (p=0.000 n=30+27)
FmtFprintfPrefixedInt-4     806ns ± 0%     805ns ± 0%    ~     (p=0.957 n=28+27)
FmtFprintfFloat-4          1.09µs ± 0%    1.09µs ± 0%  -0.04%  (p=0.001 n=22+30)
FmtManyArgs-4              2.67µs ± 0%    2.68µs ± 0%  +0.03%  (p=0.045 n=29+28)
GobDecode-4                33.2ms ± 1%    32.5ms ± 1%  -2.11%  (p=0.000 n=29+29)
GobEncode-4                29.5ms ± 0%    29.2ms ± 0%  -1.04%  (p=0.000 n=28+28)
Gzip-4                      1.39s ± 2%     1.38s ± 1%  -0.48%  (p=0.023 n=30+30)
Gunzip-4                    139ms ± 0%     139ms ± 0%    ~     (p=0.616 n=30+28)
HTTPClientServer-4          766µs ± 4%     758µs ± 3%  -1.03%  (p=0.013 n=28+29)
JSONEncode-4               49.7ms ± 0%    49.6ms ± 0%  -0.24%  (p=0.000 n=30+30)
JSONDecode-4                266ms ± 0%     268ms ± 1%  +1.07%  (p=0.000 n=29+30)
Mandelbrot200-4            16.6ms ± 0%    16.6ms ± 0%    ~     (p=0.248 n=30+29)
GoParse-4                  15.9ms ± 0%    16.0ms ± 0%  +0.76%  (p=0.000 n=29+29)
RegexpMatchEasy0_32-4       381ns ± 0%     380ns ± 0%  -0.14%  (p=0.000 n=30+30)
RegexpMatchEasy0_1K-4      1.18µs ± 0%    1.19µs ± 1%  +0.30%  (p=0.000 n=29+30)
RegexpMatchEasy1_32-4       357ns ± 0%     357ns ± 0%    ~     (all equal)
RegexpMatchEasy1_1K-4      2.04µs ± 0%    2.05µs ± 0%  +0.50%  (p=0.000 n=26+28)
RegexpMatchMedium_32-4      590ns ± 0%     589ns ± 0%  -0.12%  (p=0.000 n=30+23)
RegexpMatchMedium_1K-4      162µs ± 0%     162µs ± 0%    ~     (p=0.318 n=28+25)
RegexpMatchHard_32-4       9.56µs ± 0%    9.56µs ± 0%    ~     (p=0.072 n=30+29)
RegexpMatchHard_1K-4        287µs ± 0%     287µs ± 0%  -0.02%  (p=0.005 n=28+28)
Revcomp-4                   2.50s ± 0%     2.51s ± 0%    ~     (p=0.246 n=29+29)
Template-4                  312ms ± 1%     313ms ± 1%  +0.46%  (p=0.002 n=30+30)
TimeParse-4                1.68µs ± 0%    1.67µs ± 0%  -0.31%  (p=0.000 n=27+29)
TimeFormat-4               1.66µs ± 0%    1.64µs ± 0%  -0.92%  (p=0.000 n=29+26)
[Geo mean]                  247µs          246µs       -0.15%

name                     old speed      new speed      delta
GobDecode-4              23.1MB/s ± 1%  23.6MB/s ± 0%  +2.17%  (p=0.000 n=29+28)
GobEncode-4              26.0MB/s ± 0%  26.3MB/s ± 0%  +1.05%  (p=0.000 n=28+28)
Gzip-4                   14.0MB/s ± 2%  14.1MB/s ± 1%  +0.47%  (p=0.026 n=30+30)
Gunzip-4                  139MB/s ± 0%   139MB/s ± 0%    ~     (p=0.624 n=30+28)
JSONEncode-4             39.1MB/s ± 0%  39.2MB/s ± 0%  +0.24%  (p=0.000 n=30+30)
JSONDecode-4             7.31MB/s ± 0%  7.23MB/s ± 1%  -1.07%  (p=0.000 n=28+30)
GoParse-4                3.65MB/s ± 0%  3.62MB/s ± 0%  -0.77%  (p=0.000 n=29+29)
RegexpMatchEasy0_32-4    84.0MB/s ± 0%  84.1MB/s ± 0%  +0.18%  (p=0.000 n=28+30)
RegexpMatchEasy0_1K-4     864MB/s ± 0%   861MB/s ± 1%  -0.29%  (p=0.000 n=29+30)
RegexpMatchEasy1_32-4    89.5MB/s ± 0%  89.5MB/s ± 0%    ~     (p=0.841 n=28+28)
RegexpMatchEasy1_1K-4     502MB/s ± 0%   500MB/s ± 0%  -0.51%  (p=0.000 n=29+29)
RegexpMatchMedium_32-4   1.69MB/s ± 0%  1.70MB/s ± 0%  +0.41%  (p=0.000 n=26+30)
RegexpMatchMedium_1K-4   6.31MB/s ± 0%  6.30MB/s ± 0%    ~     (p=0.129 n=30+25)
RegexpMatchHard_32-4     3.35MB/s ± 0%  3.35MB/s ± 0%    ~     (p=0.657 n=30+29)
RegexpMatchHard_1K-4     3.57MB/s ± 0%  3.57MB/s ± 0%    ~     (all equal)
Revcomp-4                 102MB/s ± 0%   101MB/s ± 0%    ~     (p=0.213 n=29+29)
Template-4               6.22MB/s ± 1%  6.19MB/s ± 1%  -0.42%  (p=0.005 n=30+29)
[Geo mean]               24.1MB/s       24.2MB/s       +0.08%

Change-Id: I6c02d3c9975f6bd8bc215cb1fc14d29602b45649
Reviewed-on: https://go-review.googlesource.com/138095
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/rewriteARM64.go
test/codegen/arithmetic.go

index 8fb39538c2a1cd74e804d13d0ae47d81a52dddd3..3fce018d45ef520a75e36fba8304010187bc892c 100644 (file)
 (MULW (NEG x) y) -> (MNEGW x y)
 
 // madd/msub
-(ADD a l:(MUL  x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y)
-(SUB a l:(MUL  x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y)
-(ADD a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y)
-(SUB a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y)
+(ADD a l:(MUL  x y)) && l.Uses==1 && clobber(l) -> (MADD a x y)
+(SUB a l:(MUL  x y)) && l.Uses==1 && clobber(l) -> (MSUB a x y)
+(ADD a l:(MNEG x y)) && l.Uses==1 && clobber(l) -> (MSUB a x y)
+(SUB a l:(MNEG x y)) && l.Uses==1 && clobber(l) -> (MADD a x y)
 
-(ADD a l:(MULW  x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y)
-(SUB a l:(MULW  x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y)
-(ADD a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y)
-(SUB a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y)
+(ADD a l:(MULW  x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MADDW a x y)
+(SUB a l:(MULW  x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MSUBW a x y)
+(ADD a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MSUBW a x y)
+(SUB a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MADDW a x y)
 
 // mul by constant
 (MUL x (MOVDconst [-1])) -> (NEG x)
 (MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
 (MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
 
+(MADD a x (MOVDconst [-1])) -> (SUB a x)
+(MADD a _ (MOVDconst [0])) -> a
+(MADD a x (MOVDconst [1])) -> (ADD a x)
+(MADD a x (MOVDconst [c])) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
+(MADD a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MADD a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+
+(MADD a (MOVDconst [-1]) x) -> (SUB a x)
+(MADD a (MOVDconst [0]) _) -> a
+(MADD a (MOVDconst [1]) x) -> (ADD a x)
+(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
+(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+
+(MADDW a x (MOVDconst [c])) && int32(c)==-1 -> (SUB a x)
+(MADDW a _ (MOVDconst [c])) && int32(c)==0 -> a
+(MADDW a x (MOVDconst [c])) && int32(c)==1 -> (ADD a x)
+(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
+(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+
+(MADDW a (MOVDconst [c]) x) && int32(c)==-1 -> (SUB a x)
+(MADDW a (MOVDconst [c]) _) && int32(c)==0 -> a
+(MADDW a (MOVDconst [c]) x) && int32(c)==1 -> (ADD a x)
+(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
+(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+
+(MSUB a x (MOVDconst [-1])) -> (ADD a x)
+(MSUB a _ (MOVDconst [0])) -> a
+(MSUB a x (MOVDconst [1])) -> (SUB a x)
+(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
+(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+
+(MSUB a (MOVDconst [-1]) x) -> (ADD a x)
+(MSUB a (MOVDconst [0]) _) -> a
+(MSUB a (MOVDconst [1]) x) -> (SUB a x)
+(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
+(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+
+(MSUBW a x (MOVDconst [c])) && int32(c)==-1 -> (ADD a x)
+(MSUBW a _ (MOVDconst [c])) && int32(c)==0 -> a
+(MSUBW a x (MOVDconst [c])) && int32(c)==1 -> (SUB a x)
+(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
+(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+
+(MSUBW a (MOVDconst [c]) x) && int32(c)==-1 -> (ADD a x)
+(MSUBW a (MOVDconst [c]) _) && int32(c)==0 -> a
+(MSUBW a (MOVDconst [c]) x) && int32(c)==1 -> (SUB a x)
+(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
+(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+
 // div by constant
 (UDIV x (MOVDconst [1])) -> x
 (UDIV x (MOVDconst [c])) && isPowerOfTwo(c) -> (SRLconst [log2(c)] x)
 (MULW  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)*int32(d))])
 (MNEG  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [-c*d])
 (MNEGW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [-int64(int32(c)*int32(d))])
+(MADD  (MOVDconst [c]) x y) -> (ADDconst [c] (MUL   <x.Type> x y))
+(MADDW (MOVDconst [c]) x y) -> (ADDconst [c] (MULW  <x.Type> x y))
+(MSUB  (MOVDconst [c]) x y) -> (ADDconst [c] (MNEG  <x.Type> x y))
+(MSUBW (MOVDconst [c]) x y) -> (ADDconst [c] (MNEGW <x.Type> x y))
+(MADD  a (MOVDconst [c]) (MOVDconst [d])) -> (ADDconst [c*d] a)
+(MADDW a (MOVDconst [c]) (MOVDconst [d])) -> (ADDconst [int64(int32(c)*int32(d))] a)
+(MSUB  a (MOVDconst [c]) (MOVDconst [d])) -> (SUBconst [c*d] a)
+(MSUBW a (MOVDconst [c]) (MOVDconst [d])) -> (SUBconst [int64(int32(c)*int32(d))] a)
 (DIV   (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c/d])
 (UDIV  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint64(c)/uint64(d))])
 (DIVW  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)/int32(d))])
index 5bf165df48f6d9f727410949c0d698249471424c..f07ab4209087fa4ecfc6e0030dd8887994c868fa 100644 (file)
@@ -139,6 +139,10 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64LessThan_0(v)
        case OpARM64LessThanU:
                return rewriteValueARM64_OpARM64LessThanU_0(v)
+       case OpARM64MADD:
+               return rewriteValueARM64_OpARM64MADD_0(v) || rewriteValueARM64_OpARM64MADD_10(v) || rewriteValueARM64_OpARM64MADD_20(v)
+       case OpARM64MADDW:
+               return rewriteValueARM64_OpARM64MADDW_0(v) || rewriteValueARM64_OpARM64MADDW_10(v) || rewriteValueARM64_OpARM64MADDW_20(v)
        case OpARM64MNEG:
                return rewriteValueARM64_OpARM64MNEG_0(v) || rewriteValueARM64_OpARM64MNEG_10(v) || rewriteValueARM64_OpARM64MNEG_20(v)
        case OpARM64MNEGW:
@@ -245,6 +249,10 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v)
        case OpARM64MOVWstorezeroidx4:
                return rewriteValueARM64_OpARM64MOVWstorezeroidx4_0(v)
+       case OpARM64MSUB:
+               return rewriteValueARM64_OpARM64MSUB_0(v) || rewriteValueARM64_OpARM64MSUB_10(v) || rewriteValueARM64_OpARM64MSUB_20(v)
+       case OpARM64MSUBW:
+               return rewriteValueARM64_OpARM64MSUBW_0(v) || rewriteValueARM64_OpARM64MSUBW_10(v) || rewriteValueARM64_OpARM64MSUBW_20(v)
        case OpARM64MUL:
                return rewriteValueARM64_OpARM64MUL_0(v) || rewriteValueARM64_OpARM64MUL_10(v) || rewriteValueARM64_OpARM64MUL_20(v)
        case OpARM64MULW:
@@ -924,7 +932,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                return true
        }
        // match: (ADD a l:(MUL x y))
-       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: l.Uses==1 && clobber(l)
        // result: (MADD a x y)
        for {
                _ = v.Args[1]
@@ -936,7 +944,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                _ = l.Args[1]
                x := l.Args[0]
                y := l.Args[1]
-               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MADD)
@@ -946,7 +954,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                return true
        }
        // match: (ADD l:(MUL x y) a)
-       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: l.Uses==1 && clobber(l)
        // result: (MADD a x y)
        for {
                _ = v.Args[1]
@@ -958,7 +966,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                x := l.Args[0]
                y := l.Args[1]
                a := v.Args[1]
-               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MADD)
@@ -968,7 +976,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                return true
        }
        // match: (ADD a l:(MNEG x y))
-       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: l.Uses==1 && clobber(l)
        // result: (MSUB a x y)
        for {
                _ = v.Args[1]
@@ -980,7 +988,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                _ = l.Args[1]
                x := l.Args[0]
                y := l.Args[1]
-               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MSUB)
@@ -990,7 +998,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                return true
        }
        // match: (ADD l:(MNEG x y) a)
-       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: l.Uses==1 && clobber(l)
        // result: (MSUB a x y)
        for {
                _ = v.Args[1]
@@ -1002,7 +1010,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                x := l.Args[0]
                y := l.Args[1]
                a := v.Args[1]
-               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MSUB)
@@ -1012,7 +1020,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                return true
        }
        // match: (ADD a l:(MULW x y))
-       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l)
        // result: (MADDW a x y)
        for {
                _ = v.Args[1]
@@ -1024,7 +1032,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                _ = l.Args[1]
                x := l.Args[0]
                y := l.Args[1]
-               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MADDW)
@@ -1034,7 +1042,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                return true
        }
        // match: (ADD l:(MULW x y) a)
-       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l)
        // result: (MADDW a x y)
        for {
                _ = v.Args[1]
@@ -1046,7 +1054,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                x := l.Args[0]
                y := l.Args[1]
                a := v.Args[1]
-               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MADDW)
@@ -1056,7 +1064,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                return true
        }
        // match: (ADD a l:(MNEGW x y))
-       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l)
        // result: (MSUBW a x y)
        for {
                _ = v.Args[1]
@@ -1068,7 +1076,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                _ = l.Args[1]
                x := l.Args[0]
                y := l.Args[1]
-               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MSUBW)
@@ -1078,7 +1086,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                return true
        }
        // match: (ADD l:(MNEGW x y) a)
-       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l)
        // result: (MSUBW a x y)
        for {
                _ = v.Args[1]
@@ -1090,7 +1098,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                x := l.Args[0]
                y := l.Args[1]
                a := v.Args[1]
-               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MSUBW)
@@ -6515,192 +6523,229 @@ func rewriteValueARM64_OpARM64LessThanU_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MNEG_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MADD_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MNEG x (MOVDconst [-1]))
+       // match: (MADD a x (MOVDconst [-1]))
        // cond:
-       // result: x
+       // result: (SUB a x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != -1 {
+               if v_2.AuxInt != -1 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
                v.AddArg(x)
                return true
        }
-       // match: (MNEG (MOVDconst [-1]) x)
+       // match: (MADD a _ (MOVDconst [0]))
        // cond:
-       // result: x
+       // result: a
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != -1 {
+               if v_2.AuxInt != 0 {
                        break
                }
-               x := v.Args[1]
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = a.Type
+               v.AddArg(a)
                return true
        }
-       // match: (MNEG _ (MOVDconst [0]))
+       // match: (MADD a x (MOVDconst [1]))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (ADD a x)
        for {
-               _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 0 {
+               if v_2.AuxInt != 1 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MNEG (MOVDconst [0]) _)
-       // cond:
-       // result: (MOVDconst [0])
+       // match: (MADD a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (ADDshiftLL a x [log2(c)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 0 {
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MNEG x (MOVDconst [1]))
-       // cond:
-       // result: (NEG x)
+       // match: (MADD a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && c>=3
+       // result: (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c-1) && c >= 3) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MNEG (MOVDconst [1]) x)
-       // cond:
-       // result: (NEG x)
+       // match: (MADD a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && c>=7
+       // result: (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 1 {
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c+1) && c >= 7) {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
+       // match: (MADD a x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               c := v_2.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 3)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
+       // match: (MADD a x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               c := v_2.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 5)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && c >= 3
-       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       // match: (MADD a x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && c >= 3) {
+               c := v_2.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c - 1)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 7)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
                v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && c >= 3
-       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       // match: (MADD a x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && c >= 3) {
+               c := v_2.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               v.reset(OpARM64NEG)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 9)
+               v.AddArg(a)
                v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c - 1)
+               v0.AuxInt = 3
                v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
@@ -6708,488 +6753,508 @@ func rewriteValueARM64_OpARM64MNEG_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool {
+func rewriteValueARM64_OpARM64MADD_10(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && c >= 7
-       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+       // match: (MADD a (MOVDconst [-1]) x)
+       // cond:
+       // result: (SUB a x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               if v_1.AuxInt != -1 {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c + 1)
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               x := v.Args[2]
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && c >= 7
-       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+       // match: (MADD a (MOVDconst [0]) _)
+       // cond:
+       // result: a
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c + 1)
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = a.Type
+               v.AddArg(a)
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+       // match: (MADD a (MOVDconst [1]) x)
+       // cond:
+       // result: (ADD a x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               x := v.Args[2]
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+       // match: (MADD a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (ADDshiftLL a x [log2(c)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+       // match: (MADD a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && c>=3
+       // result: (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               x := v.Args[2]
+               if !(isPowerOfTwo(c-1) && c >= 3) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 5)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 2
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+       // match: (MADD a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && c>=7
+       // result: (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(isPowerOfTwo(c+1) && c >= 7) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 5)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 2
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
-       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       // match: (MADD a (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               x := v.Args[2]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 7)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 3)
+               v.AddArg(a)
                v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 3
+               v0.AuxInt = 2
                v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
-       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       // match: (MADD a (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 3
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 5)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
                v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       // match: (MADD a (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               x := v.Args[2]
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 9)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 3
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 7)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
+       // match: (MADD a (MOVDconst [c]) x)
        // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       // result: (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
+               x := v.Args[2]
                if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 9)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 3
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 9)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MNEG_20(v *Value) bool {
-       // match: (MNEG (MOVDconst [c]) (MOVDconst [d]))
+func rewriteValueARM64_OpARM64MADD_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MADD (MOVDconst [c]) x y)
        // cond:
-       // result: (MOVDconst [-c*d])
+       // result: (ADDconst [c] (MUL <x.Type> x y))
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -c * d
+               x := v.Args[1]
+               y := v.Args[2]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (MNEG (MOVDconst [d]) (MOVDconst [c]))
+       // match: (MADD a (MOVDconst [c]) (MOVDconst [d]))
        // cond:
-       // result: (MOVDconst [-c*d])
+       // result: (ADDconst [c*d] a)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               d := v_0.AuxInt
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -c * d
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_2.AuxInt
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c * d
+               v.AddArg(a)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MNEGW_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MADDW_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MNEGW x (MOVDconst [c]))
+       // match: (MADDW a x (MOVDconst [c]))
        // cond: int32(c)==-1
-       // result: x
+       // result: (SUB a x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
+                       break
                }
-               c := v_1.AuxInt
+               c := v_2.AuxInt
                if !(int32(c) == -1) {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
                v.AddArg(x)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: int32(c)==-1
-       // result: x
+       // match: (MADDW a _ (MOVDconst [c]))
+       // cond: int32(c)==0
+       // result: a
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(int32(c) == -1) {
+               c := v_2.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = a.Type
+               v.AddArg(a)
                return true
        }
-       // match: (MNEGW _ (MOVDconst [c]))
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       // match: (MADDW a x (MOVDconst [c]))
+       // cond: int32(c)==1
+       // result: (ADD a x)
        for {
-               _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 0) {
+               c := v_2.AuxInt
+               if !(int32(c) == 1) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) _)
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       // match: (MADDW a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (ADDshiftLL a x [log2(c)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               if !(int32(c) == 0) {
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: int32(c)==1
-       // result: (NEG x)
+       // match: (MADDW a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c)>=3
+       // result: (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 1) {
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: int32(c)==1
-       // result: (NEG x)
+       // match: (MADDW a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c)>=7
+       // result: (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(int32(c) == 1) {
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
+       // match: (MADDW a x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               c := v_2.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 3)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
+       // match: (MADDW a x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               c := v_2.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 5)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       // match: (MADDW a x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               c := v_2.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c - 1)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 7)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
                v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       // match: (MADDW a x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               c := v_2.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64NEG)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 9)
+               v.AddArg(a)
                v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c - 1)
+               v0.AuxInt = 3
                v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
@@ -7197,196 +7262,205 @@ func rewriteValueARM64_OpARM64MNEGW_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool {
+func rewriteValueARM64_OpARM64MADDW_10(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+       // match: (MADDW a (MOVDconst [c]) x)
+       // cond: int32(c)==-1
+       // result: (SUB a x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+               x := v.Args[2]
+               if !(int32(c) == -1) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c + 1)
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+       // match: (MADDW a (MOVDconst [c]) _)
+       // cond: int32(c)==0
+       // result: a
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+               c := v_1.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c + 1)
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = a.Type
+               v.AddArg(a)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+       // match: (MADDW a (MOVDconst [c]) x)
+       // cond: int32(c)==1
+       // result: (ADD a x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               x := v.Args[2]
+               if !(int32(c) == 1) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+       // match: (MADDW a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (ADDshiftLL a x [log2(c)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 2
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c)
+               v.AddArg(a)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MADDW a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && int32(c)>=3
+       // result: (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
                v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+       // match: (MADDW a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && int32(c)>=7
+       // result: (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+               x := v.Args[2]
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 5)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 2
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+       // match: (MADDW a (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 5)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 2
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 3)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       // match: (MADDW a (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+               x := v.Args[2]
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 3
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 5)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
                v0.AddArg(x)
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
+       // match: (MADDW a (MOVDconst [c]) x)
        // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       // result: (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
+               x := v.Args[2]
                if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
+               v.reset(OpARM64SUBshiftLL)
                v.AuxInt = log2(c / 7)
+               v.AddArg(a)
                v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
                v0.AuxInt = 3
                v0.AddArg(x)
@@ -7394,880 +7468,1832 @@ func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool {
                v.AddArg(v0)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
+       // match: (MADDW a (MOVDconst [c]) x)
        // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       // result: (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
+               x := v.Args[2]
                if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 9)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 3
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 9)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       return false
+}
+func rewriteValueARM64_OpARM64MADDW_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MADDW (MOVDconst [c]) x y)
+       // cond:
+       // result: (ADDconst [c] (MULW <x.Type> x y))
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 9)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 3
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
+               y := v.Args[2]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+               v0.AddArg(x)
+               v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MNEGW_20(v *Value) bool {
-       // match: (MNEGW (MOVDconst [c]) (MOVDconst [d]))
+       // match: (MADDW a (MOVDconst [c]) (MOVDconst [d]))
        // cond:
-       // result: (MOVDconst [-int64(int32(c)*int32(d))])
+       // result: (ADDconst [int64(int32(c)*int32(d))] a)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -int64(int32(c) * int32(d))
+               c := v_1.AuxInt
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_2.AuxInt
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = int64(int32(c) * int32(d))
+               v.AddArg(a)
                return true
        }
-       // match: (MNEGW (MOVDconst [d]) (MOVDconst [c]))
+       return false
+}
+func rewriteValueARM64_OpARM64MNEG_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MNEG x (MOVDconst [-1]))
        // cond:
-       // result: (MOVDconst [-int64(int32(c)*int32(d))])
+       // result: x
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               d := v_0.AuxInt
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -int64(int32(c) * int32(d))
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOD_0(v *Value) bool {
-       // match: (MOD (MOVDconst [c]) (MOVDconst [d]))
+       // match: (MNEG (MOVDconst [-1]) x)
        // cond:
-       // result: (MOVDconst [c%d])
+       // result: x
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
+               if v_0.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MNEG _ (MOVDconst [0]))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               d := v_1.AuxInt
+               if v_1.AuxInt != 0 {
+                       break
+               }
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = c % d
+               v.AuxInt = 0
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MODW_0(v *Value) bool {
-       // match: (MODW (MOVDconst [c]) (MOVDconst [d]))
+       // match: (MNEG (MOVDconst [0]) _)
        // cond:
-       // result: (MOVDconst [int64(int32(c)%int32(d))])
+       // result: (MOVDconst [0])
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0.AuxInt != 0 {
                        break
                }
-               d := v_1.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c) % int32(d))
+               v.AuxInt = 0
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBUload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBUload [off1+off2] {sym} ptr mem)
+       // match: (MNEG x (MOVDconst [1]))
+       // cond:
+       // result: (NEG x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpARM64MOVBUload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBUload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVBUloadidx ptr idx mem)
+       // match: (MNEG (MOVDconst [1]) x)
+       // cond:
+       // result: (NEG x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               if v_0.AuxInt != 1 {
                        break
                }
-               v.reset(OpARM64MOVBUloadidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               x := v.Args[1]
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpARM64MOVBUload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVBstorezero {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBUloadidx_0(v *Value) bool {
-       // match: (MOVBUloadidx ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVBUload [c] ptr mem)
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBUload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               if !(isPowerOfTwo(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBUloadidx (MOVDconst [c]) ptr mem)
-       // cond:
-       // result: (MOVBUload [c] ptr mem)
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
        for {
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBUload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVBstorezeroidx {
-                       break
-               }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && c >= 3) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBUreg_0(v *Value) bool {
-       // match: (MOVBUreg x:(MOVBUload _ _))
-       // cond:
-       // result: (MOVDreg x)
+func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBUreg x:(MOVBUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBUreg x:(MOVBUreg _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVBUreg (ANDconst [c] x))
-       // cond:
-       // result: (ANDconst [c&(1<<8-1)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<8 - 1)
-               v.AddArg(x)
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBUreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(uint8(c))])
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
        for {
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint8(c))
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBUreg x)
-       // cond: x.Type.IsBoolean()
-       // result: (MOVDreg x)
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if !(x.Type.IsBoolean()) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 5)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 2
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBUreg (SLLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<8-1, sc)
-       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
        for {
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<8-1, sc)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))
-               v.AddArg(x)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 5)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 2
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBUreg (SRLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<8-1, 0)
-       // result: (UBFX [arm64BFAuxInt(sc, 8)] x)
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<8-1, 0)) {
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, 8)
-               v.AddArg(x)
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
                        break
                }
-               v.reset(OpARM64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVBloadidx ptr idx mem)
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               v.reset(OpARM64MOVBloadidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 9)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 3
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               v.reset(OpARM64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 9)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 3
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
+       return false
+}
+func rewriteValueARM64_OpARM64MNEG_20(v *Value) bool {
+       // match: (MNEG (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [-c*d])
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               ptr := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVBstorezero {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -c * d
+               return true
+       }
+       // match: (MNEG (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [-c*d])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
+               c := v_1.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.AuxInt = -c * d
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBloadidx_0(v *Value) bool {
-       // match: (MOVBloadidx ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVBload [c] ptr mem)
+func rewriteValueARM64_OpARM64MNEGW_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: int32(c)==-1
+       // result: x
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBloadidx (MOVDconst [c]) ptr mem)
-       // cond:
-       // result: (MOVBload [c] ptr mem)
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: int32(c)==-1
+       // result: x
        for {
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               x := v.Args[1]
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // match: (MNEGW _ (MOVDconst [c]))
+       // cond: int32(c)==0
        // result: (MOVDconst [0])
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVBstorezeroidx {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               c := v_1.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
                v.reset(OpARM64MOVDconst)
                v.AuxInt = 0
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBreg_0(v *Value) bool {
-       // match: (MOVBreg x:(MOVBload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MNEGW (MOVDconst [c]) _)
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBload {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVBreg x:(MOVBloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBloadidx {
+               c := v_0.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVBreg x:(MOVBreg _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: int32(c)==1
+       // result: (NEG x)
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpARM64MOVBreg {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
+               c := v_1.AuxInt
+               if !(int32(c) == 1) {
+                       break
+               }
+               v.reset(OpARM64NEG)
                v.AddArg(x)
                return true
        }
-       // match: (MOVBreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int8(c))])
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: int32(c)==1
+       // result: (NEG x)
        for {
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int8(c))
+               x := v.Args[1]
+               if !(int32(c) == 1) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBreg (SLLconst [lc] x))
-       // cond: lc < 8
-       // result: (SBFIZ [arm64BFAuxInt(lc, 8-lc)] x)
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc < 8) {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BFAuxInt(lc, 8-lc)
-               v.AddArg(x)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVBstoreidx ptr idx val mem)
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
                        break
                }
-               v.reset(OpARM64MOVBstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
                        break
                }
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVBstorezero [off] {sym} ptr mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 0 {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVBreg x) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVBreg {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstore [off] {sym} ptr (MOVBUreg x) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVBUreg {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVHreg x) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHreg {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVHUreg x) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHUreg {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVWreg x) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWreg {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 5)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 2
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVWUreg x) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
        for {
-               off := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 5)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 2
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 9)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 3
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 9)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 3
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MNEGW_20(v *Value) bool {
+       // match: (MNEGW (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [-int64(int32(c)*int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -int64(int32(c) * int32(d))
+               return true
+       }
+       // match: (MNEGW (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [-int64(int32(c)*int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -int64(int32(c) * int32(d))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOD_0(v *Value) bool {
+       // match: (MOD (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c%d])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c % d
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MODW_0(v *Value) bool {
+       // match: (MODW (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c)%int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c) % int32(d))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBUload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBUload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBUload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBUloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVBUloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBUload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVBstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBUloadidx_0(v *Value) bool {
+       // match: (MOVBUloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVBUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVBUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVBstorezeroidx {
+                       break
+               }
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBUreg_0(v *Value) bool {
+       // match: (MOVBUreg x:(MOVBUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg x:(MOVBUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg (ANDconst [c] x))
+       // cond:
+       // result: (ANDconst [c&(1<<8-1)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<8 - 1)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(uint8(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint8(c))
+               return true
+       }
+       // match: (MOVBUreg x)
+       // cond: x.Type.IsBoolean()
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if !(x.Type.IsBoolean()) {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg (SLLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<8-1, sc)
+       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<8-1, sc)) {
+                       break
+               }
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg (SRLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<8-1, 0)
+       // result: (UBFX [arm64BFAuxInt(sc, 8)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
+                       break
+               }
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<8-1, 0)) {
+                       break
+               }
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, 8)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVBloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVBstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBloadidx_0(v *Value) bool {
+       // match: (MOVBloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVBload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVBload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVBstorezeroidx {
+                       break
+               }
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBreg_0(v *Value) bool {
+       // match: (MOVBreg x:(MOVBload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBreg x:(MOVBloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBreg x:(MOVBreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int8(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int8(c))
+               return true
+       }
+       // match: (MOVBreg (SLLconst [lc] x))
+       // cond: lc < 8
+       // result: (SBFIZ [arm64BFAuxInt(lc, 8-lc)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               lc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(lc < 8) {
+                       break
+               }
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BFAuxInt(lc, 8-lc)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBstoreidx ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVBstorezero [off] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVBreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVBreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVBUreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVBUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVHreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVHUreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVWreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -14508,9 +15534,403 @@ func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem)
+       // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem)
+       // cond:
+       // result: (MOVHstorezero [c] idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezeroidx ptr (SLLconst [1] idx) mem)
+       // cond:
+       // result: (MOVHstorezeroidx2 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezeroidx ptr (ADD idx idx) mem)
+       // cond:
+       // result: (MOVHstorezeroidx2 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_1.Args[1]
+               idx := v_1.Args[0]
+               if idx != v_1.Args[1] {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezeroidx (SLLconst [1] idx) ptr mem)
+       // cond:
+       // result: (MOVHstorezeroidx2 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezeroidx (ADD idx idx) ptr mem)
+       // cond:
+       // result: (MOVHstorezeroidx2 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               idx := v_0.Args[0]
+               if idx != v_0.Args[1] {
+                       break
+               }
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstorezeroidx ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstorezeroidx {
+                       break
+               }
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstorezeroidx2_0(v *Value) bool {
+       // match: (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVHstorezero [c<<1] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = c << 1
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVQstorezero [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _))
+       // cond:
+       // result: (FMOVSfpgp val)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FMOVSstore {
+                       break
+               }
+               if v_1.AuxInt != off {
+                       break
+               }
+               if v_1.Aux != sym {
+                       break
+               }
+               _ = v_1.Args[2]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               val := v_1.Args[1]
+               v.reset(OpARM64FMOVSfpgp)
+               v.AddArg(val)
+               return true
+       }
+       // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWUload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWUloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWUloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWUloadidx4 ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWUloadidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
+       // match: (MOVWUloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUloadidx (MOVDconst [c]) ptr mem)
        // cond:
-       // result: (MOVHstorezero [c] idx mem)
+       // result: (MOVWUload [c] ptr mem)
        for {
                _ = v.Args[2]
                v_0 := v.Args[0]
@@ -14518,17 +15938,17 @@ func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool {
                        break
                }
                c := v_0.AuxInt
-               idx := v.Args[1]
+               ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezero)
+               v.reset(OpARM64MOVWUload)
                v.AuxInt = c
-               v.AddArg(idx)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezeroidx ptr (SLLconst [1] idx) mem)
+       // match: (MOVWUloadidx ptr (SLLconst [2] idx) mem)
        // cond:
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // result: (MOVWUloadidx4 ptr idx mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -14536,231 +15956,321 @@ func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool {
                if v_1.Op != OpARM64SLLconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if v_1.AuxInt != 2 {
                        break
                }
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezeroidx2)
+               v.reset(OpARM64MOVWUloadidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezeroidx ptr (ADD idx idx) mem)
+       // match: (MOVWUloadidx (SLLconst [2] idx) ptr mem)
        // cond:
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // result: (MOVWUloadidx4 ptr idx mem)
        for {
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADD {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               _ = v_1.Args[1]
-               idx := v_1.Args[0]
-               if idx != v_1.Args[1] {
+               if v_0.AuxInt != 2 {
                        break
                }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezeroidx2)
+               v.reset(OpARM64MOVWUloadidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezeroidx (SLLconst [1] idx) ptr mem)
-       // cond:
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx {
                        break
                }
-               if v_0.AuxInt != 1 {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezeroidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVHstorezeroidx (ADD idx idx) ptr mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUloadidx4_0(v *Value) bool {
+       // match: (MOVWUloadidx4 ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // result: (MOVWUload [c<<2] ptr mem)
        for {
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               idx := v_0.Args[0]
-               if idx != v_0.Args[1] {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               ptr := v.Args[1]
+               c := v_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezeroidx2)
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = c << 2
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstorezeroidx ptr idx mem)
+       // match: (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADDconst {
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx4 {
                        break
                }
-               if v_1.AuxInt != 2 {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
                        break
                }
-               idx := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstorezeroidx {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
+       // match: (MOVWUreg x:(MOVBUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVHUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUload {
                        break
                }
-               if idx != x.Args[1] {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVWUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUload {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstorezeroidx2_0(v *Value) bool {
-       // match: (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem)
+       // match: (MOVWUreg x:(MOVHUloadidx _ _ _))
        // cond:
-       // result: (MOVHstorezero [c<<1] ptr mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = c << 1
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVQstorezero [off1+off2] {sym} ptr mem)
+       // match: (MOVWUreg x:(MOVWUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUloadidx {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVHUloadidx2 _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx2 {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MOVWUreg x:(MOVWUloadidx4 _ _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUloadidx4 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVBUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVHUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _))
+func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool {
+       // match: (MOVWUreg x:(MOVWUreg _))
        // cond:
-       // result: (FMOVSfpgp val)
+       // result: (MOVDreg x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FMOVSstore {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUreg {
                        break
                }
-               if v_1.AuxInt != off {
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg (ANDconst [c] x))
+       // cond:
+       // result: (ANDconst [c&(1<<32-1)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               if v_1.Aux != sym {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<32 - 1)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(uint32(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_1.Args[2]
-               if ptr != v_1.Args[0] {
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint32(c))
+               return true
+       }
+       // match: (MOVWUreg (SLLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<32-1, sc)
+       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               val := v_1.Args[1]
-               v.reset(OpARM64FMOVSfpgp)
-               v.AddArg(val)
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<32-1, sc)) {
+                       break
+               }
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVWUreg (SRLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<32-1, 0)
+       // result: (UBFX [arm64BFAuxInt(sc, 32)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
+                       break
+               }
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<32-1, 0)) {
+                       break
+               }
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, 32)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWUload [off1+off2] {sym} ptr mem)
+       // result: (MOVWload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -14775,16 +16285,16 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWUload)
+               v.reset(OpARM64MOVWload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVWload [off] {sym} (ADD ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVWUloadidx ptr idx mem)
+       // result: (MOVWloadidx ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -14800,15 +16310,15 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWUloadidx)
+               v.reset(OpARM64MOVWloadidx)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+       // match: (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVWUloadidx4 ptr idx mem)
+       // result: (MOVWloadidx4 ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -14827,15 +16337,15 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWUloadidx4)
+               v.reset(OpARM64MOVWloadidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -14851,14 +16361,14 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWUload)
+               v.reset(OpARM64MOVWload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
        // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
        // result: (MOVDconst [0])
        for {
@@ -14883,10 +16393,10 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
-       // match: (MOVWUloadidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool {
+       // match: (MOVWloadidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVWUload [c] ptr mem)
+       // result: (MOVWload [c] ptr mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -14896,15 +16406,15 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
                }
                c := v_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpARM64MOVWUload)
+               v.reset(OpARM64MOVWload)
                v.AuxInt = c
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx (MOVDconst [c]) ptr mem)
+       // match: (MOVWloadidx (MOVDconst [c]) ptr mem)
        // cond:
-       // result: (MOVWUload [c] ptr mem)
+       // result: (MOVWload [c] ptr mem)
        for {
                _ = v.Args[2]
                v_0 := v.Args[0]
@@ -14914,15 +16424,15 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
                c := v_0.AuxInt
                ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpARM64MOVWUload)
+               v.reset(OpARM64MOVWload)
                v.AuxInt = c
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx ptr (SLLconst [2] idx) mem)
+       // match: (MOVWloadidx ptr (SLLconst [2] idx) mem)
        // cond:
-       // result: (MOVWUloadidx4 ptr idx mem)
+       // result: (MOVWloadidx4 ptr idx mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -14935,15 +16445,15 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
                }
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpARM64MOVWUloadidx4)
+               v.reset(OpARM64MOVWloadidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx (SLLconst [2] idx) ptr mem)
+       // match: (MOVWloadidx (SLLconst [2] idx) ptr mem)
        // cond:
-       // result: (MOVWUloadidx4 ptr idx mem)
+       // result: (MOVWloadidx4 ptr idx mem)
        for {
                _ = v.Args[2]
                v_0 := v.Args[0]
@@ -14956,13 +16466,13 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
                idx := v_0.Args[0]
                ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpARM64MOVWUloadidx4)
+               v.reset(OpARM64MOVWloadidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
+       // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
        // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
        // result: (MOVDconst [0])
        for {
@@ -14985,10 +16495,10 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWUloadidx4_0(v *Value) bool {
-       // match: (MOVWUloadidx4 ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVWloadidx4_0(v *Value) bool {
+       // match: (MOVWloadidx4 ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVWUload [c<<2] ptr mem)
+       // result: (MOVWload [c<<2] ptr mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -14998,13 +16508,13 @@ func rewriteValueARM64_OpARM64MOVWUloadidx4_0(v *Value) bool {
                }
                c := v_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpARM64MOVWUload)
+               v.reset(OpARM64MOVWload)
                v.AuxInt = c << 2
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
+       // match: (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
        // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
        // result: (MOVDconst [0])
        for {
@@ -15027,8 +16537,21 @@ func rewriteValueARM64_OpARM64MOVWUloadidx4_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
-       // match: (MOVWUreg x:(MOVBUload _ _))
+func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool {
+       // match: (MOVWreg x:(MOVBload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBUload _ _))
        // cond:
        // result: (MOVDreg x)
        for {
@@ -15036,43 +16559,108 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
                if x.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x.Args[1]
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x.Args[2]
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUload _ _))
+       // match: (MOVWreg x:(MOVHloadidx _ _ _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVHUload {
+               if x.Op != OpARM64MOVHloadidx {
                        break
                }
-               _ = x.Args[1]
+               _ = x.Args[2]
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVWUload _ _))
+       // match: (MOVWreg x:(MOVHUloadidx _ _ _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVWUload {
+               if x.Op != OpARM64MOVHUloadidx {
                        break
                }
-               _ = x.Args[1]
+               _ = x.Args[2]
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVBUloadidx _ _ _))
+       // match: (MOVWreg x:(MOVWloadidx _ _ _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
+               if x.Op != OpARM64MOVWloadidx {
                        break
                }
                _ = x.Args[2]
@@ -15080,12 +16668,15 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUloadidx _ _ _))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool {
+       // match: (MOVWreg x:(MOVHloadidx2 _ _ _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx {
+               if x.Op != OpARM64MOVHloadidx2 {
                        break
                }
                _ = x.Args[2]
@@ -15093,12 +16684,12 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVWUloadidx _ _ _))
+       // match: (MOVWreg x:(MOVHUloadidx2 _ _ _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVWUloadidx {
+               if x.Op != OpARM64MOVHUloadidx2 {
                        break
                }
                _ = x.Args[2]
@@ -15106,12 +16697,12 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUloadidx2 _ _ _))
+       // match: (MOVWreg x:(MOVWloadidx4 _ _ _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx2 {
+               if x.Op != OpARM64MOVWloadidx4 {
                        break
                }
                _ = x.Args[2]
@@ -15119,20 +16710,19 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVWUloadidx4 _ _ _))
+       // match: (MOVWreg x:(MOVBreg _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVWUloadidx4 {
+               if x.Op != OpARM64MOVBreg {
                        break
                }
-               _ = x.Args[2]
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVBUreg _))
+       // match: (MOVWreg x:(MOVBUreg _))
        // cond:
        // result: (MOVDreg x)
        for {
@@ -15144,51 +16734,45 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUreg _))
+       // match: (MOVWreg x:(MOVHreg _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVHUreg {
+               if x.Op != OpARM64MOVHreg {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool {
-       // match: (MOVWUreg x:(MOVWUreg _))
+       // match: (MOVWreg x:(MOVHreg _))
        // cond:
        // result: (MOVDreg x)
        for {
                x := v.Args[0]
-               if x.Op != OpARM64MOVWUreg {
+               if x.Op != OpARM64MOVHreg {
                        break
                }
                v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg (ANDconst [c] x))
+       // match: (MOVWreg x:(MOVWreg _))
        // cond:
-       // result: (ANDconst [c&(1<<32-1)] x)
+       // result: (MOVDreg x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWreg {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<32 - 1)
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWUreg (MOVDconst [c]))
+       // match: (MOVWreg (MOVDconst [c]))
        // cond:
-       // result: (MOVDconst [int64(uint32(c))])
+       // result: (MOVDconst [int64(int32(c))])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -15196,83 +16780,89 @@ func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool {
                }
                c := v_0.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint32(c))
+               v.AuxInt = int64(int32(c))
                return true
        }
-       // match: (MOVWUreg (SLLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<32-1, sc)
-       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
+       // match: (MOVWreg (SLLconst [lc] x))
+       // cond: lc < 32
+       // result: (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64SLLconst {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<32-1, sc)) {
-                       break
-               }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg (SRLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<32-1, 0)
-       // result: (UBFX [arm64BFAuxInt(sc, 32)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
-                       break
-               }
-               sc := v_0.AuxInt
+               lc := v_0.AuxInt
                x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<32-1, 0)) {
+               if !(lc < 32) {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, 32)
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BFAuxInt(lc, 32-lc)
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem)
+       // cond:
+       // result: (FMOVSstore [off] {sym} ptr val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FMOVSfpgp {
+                       break
+               }
+               val := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64FMOVSstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWload [off1+off2] {sym} ptr mem)
+       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDconst {
                        break
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWload)
+               v.reset(OpARM64MOVWstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVWloadidx ptr idx mem)
+       // result: (MOVWstoreidx ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
@@ -15280,23 +16870,392 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstoreidx4 ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWloadidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVWstorezero [off] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstore [i-4] {s} ptr0 w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVDstoreidx ptr1 idx1 w mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstore_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx4 {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstore [i-4] {s} ptr0 w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVDstoreidx ptr1 idx1 w0 mem)
+       for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWloadidx4 ptr idx mem)
+       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDshiftLL {
                        break
@@ -15305,110 +17264,93 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
                        break
                }
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVWloadidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx4 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWstorezero {
+               if w0.AuxInt != j-32 {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if w != w0.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w0)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool {
-       // match: (MOVWloadidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVWstoreidx_0(v *Value) bool {
+       // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem)
        // cond:
-       // result: (MOVWload [c] ptr mem)
+       // result: (MOVWstore [c] ptr val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWload)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstore)
                v.AuxInt = c
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWloadidx (MOVDconst [c]) ptr mem)
+       // match: (MOVWstoreidx (MOVDconst [c]) idx val mem)
        // cond:
-       // result: (MOVWload [c] ptr mem)
+       // result: (MOVWstore [c] idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWload)
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstore)
                v.AuxInt = c
-               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWloadidx ptr (SLLconst [2] idx) mem)
+       // match: (MOVWstoreidx ptr (SLLconst [2] idx) val mem)
        // cond:
-       // result: (MOVWloadidx4 ptr idx mem)
+       // result: (MOVWstoreidx4 ptr idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64SLLconst {
@@ -15418,18 +17360,20 @@ func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool {
                        break
                }
                idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWloadidx4)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWloadidx (SLLconst [2] idx) ptr mem)
+       // match: (MOVWstoreidx (SLLconst [2] idx) ptr val mem)
        // cond:
-       // result: (MOVWloadidx4 ptr idx mem)
+       // result: (MOVWstoreidx4 ptr idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64SLLconst {
                        break
@@ -15439,459 +17383,245 @@ func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool {
                }
                idx := v_0.Args[0]
                ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWloadidx4)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
+       // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVWstorezeroidx ptr idx mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWstorezeroidx {
-                       break
-               }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWloadidx4_0(v *Value) bool {
-       // match: (MOVWloadidx4 ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVWload [c<<2] ptr mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_2.AuxInt != 0 {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = c << 2
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstorezeroidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
-       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
-       // result: (MOVDconst [0])
+       // match: (MOVWstoreidx ptr idx (MOVWreg x) mem)
+       // cond:
+       // result: (MOVWstoreidx ptr idx x mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWstorezeroidx4 {
-                       break
-               }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool {
-       // match: (MOVWreg x:(MOVBload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBload {
-                       break
-               }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBUload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
-                       break
-               }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHload {
-                       break
-               }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHUload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUload {
-                       break
-               }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWload {
-                       break
-               }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBloadidx {
-                       break
-               }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHloadidx {
-                       break
-               }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx {
-                       break
-               }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWloadidx {
-                       break
-               }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool {
-       // match: (MOVWreg x:(MOVHloadidx2 _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHloadidx2 {
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWreg x:(MOVHUloadidx2 _ _ _))
+       // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVWstoreidx ptr idx x mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx2 {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWreg x:(MOVWloadidx4 _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVDstoreidx ptr idx w mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWloadidx4 {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBreg {
+               if v_1.AuxInt != 4 {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVBUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               idx := v_1.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHreg {
+               if v_2.AuxInt != 32 {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHreg {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpARM64MOVWstoreidx {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVWreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWreg {
+               _ = x.Args[3]
+               if ptr != x.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if idx != x.Args[1] {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c))
-               return true
-       }
-       // match: (MOVWreg (SLLconst [lc] x))
-       // cond: lc < 32
-       // result: (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if w != x.Args[2] {
                        break
                }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc < 32) {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BFAuxInt(lc, 32-lc)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem)
+func rewriteValueARM64_OpARM64MOVWstoreidx4_0(v *Value) bool {
+       // match: (MOVWstoreidx4 ptr (MOVDconst [c]) val mem)
        // cond:
-       // result: (FMOVSstore [off] {sym} ptr val mem)
+       // result: (MOVWstore [c<<2] ptr val mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64FMOVSfpgp {
-                       break
-               }
-               val := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64FMOVSstore)
-               v.AuxInt = off
-               v.Aux = sym
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = c << 2
                v.AddArg(ptr)
                v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
+       // match: (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVWstorezeroidx4 ptr idx mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_2.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstorezeroidx4)
                v.AddArg(ptr)
-               v.AddArg(val)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstoreidx ptr idx val mem)
+       // match: (MOVWstoreidx4 ptr idx (MOVWreg x) mem)
+       // cond:
+       // result: (MOVWstoreidx4 ptr idx x mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx4 ptr idx (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVWstoreidx4 ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstoreidx4 ptr idx val mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstorezero [off1+off2] {sym} ptr mem)
        for {
-               off := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
-                       break
-               }
-               if v_0.AuxInt != 2 {
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               _ = v_0.Args[1]
+               off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx4)
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDaddr {
                        break
@@ -15899,138 +17629,106 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
+               v.reset(OpARM64MOVWstorezero)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVWstorezero [off] {sym} ptr mem)
+       // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstorezeroidx ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               if v_1.AuxInt != 0 {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpARM64MOVWstorezeroidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       // match: (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstorezeroidx4 ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWreg {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWUreg {
+               if v_0.AuxInt != 2 {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezeroidx4)
                v.AddArg(ptr)
-               v.AddArg(x)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstore [i-4] {s} ptr0 w mem)
+       // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 32 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstore {
-                       break
-               }
-               if x.AuxInt != i-4 {
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezero {
                        break
                }
+               j := x.AuxInt
                if x.Aux != s {
                        break
                }
-               _ = x.Args[2]
+               _ = x.Args[1]
                ptr1 := x.Args[0]
-               if w != x.Args[1] {
-                       break
-               }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = i - 4
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = min(i, j)
                v.Aux = s
                v.AddArg(ptr0)
-               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
+       // match: (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
        // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstoreidx ptr1 idx1 w mem)
+       // result: (MOVDstorezeroidx ptr1 idx1 mem)
        for {
                if v.AuxInt != 4 {
                        break
                }
                s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
@@ -16038,49 +17736,32 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
                _ = v_0.Args[1]
                ptr0 := v_0.Args[0]
                idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 32 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstoreidx {
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezeroidx {
                        break
                }
-               _ = x.Args[3]
+               _ = x.Args[2]
                ptr1 := x.Args[0]
                idx1 := x.Args[1]
-               if w != x.Args[2] {
-                       break
-               }
-               mem := x.Args[3]
+               mem := x.Args[2]
                if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
+               v.reset(OpARM64MOVDstorezeroidx)
                v.AddArg(ptr1)
                v.AddArg(idx1)
-               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstore_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
+       // match: (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
        // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
+       // result: (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
        for {
                if v.AuxInt != 4 {
                        break
                }
                s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDshiftLL {
                        break
@@ -16091,816 +17772,1179 @@ func rewriteValueARM64_OpARM64MOVWstore_10(v *Value) bool {
                _ = v_0.Args[1]
                ptr0 := v_0.Args[0]
                idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 32 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstoreidx4 {
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezeroidx4 {
                        break
                }
-               _ = x.Args[3]
+               _ = x.Args[2]
                ptr1 := x.Args[0]
                idx1 := x.Args[1]
-               if w != x.Args[2] {
-                       break
-               }
-               mem := x.Args[3]
+               mem := x.Args[2]
                if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
+               v.reset(OpARM64MOVDstorezeroidx)
                v.AddArg(ptr1)
                v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
                v0.AuxInt = 2
                v0.AddArg(idx1)
                v.AddArg(v0)
-               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstore [i-4] {s} ptr0 w0 mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v *Value) bool {
+       // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWstorezero [c] ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
                _ = v.Args[2]
-               ptr0 := v.Args[0]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstore {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem)
+       // cond:
+       // result: (MOVWstorezero [c] idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if x.AuxInt != i-4 {
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezeroidx ptr (SLLconst [2] idx) mem)
+       // cond:
+       // result: (MOVWstorezeroidx4 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               if x.Aux != s {
+               if v_1.AuxInt != 2 {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst {
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezeroidx (SLLconst [2] idx) ptr mem)
+       // cond:
+       // result: (MOVWstorezeroidx4 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               if w0.AuxInt != j-32 {
+               if v_0.AuxInt != 2 {
                        break
                }
-               if w != w0.Args[0] {
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVDstorezeroidx ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if v_1.AuxInt != 4 {
+                       break
+               }
+               idx := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstorezeroidx {
+                       break
+               }
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
                        break
                }
                mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w0)
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstoreidx ptr1 idx1 w0 mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezeroidx4_0(v *Value) bool {
+       // match: (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWstorezero [c<<2] ptr mem)
        for {
-               if v.AuxInt != 4 {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               s := v.Aux
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = c << 2
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MSUB_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MSUB a x (MOVDconst [-1]))
+       // cond:
+       // result: (ADD a x)
+       for {
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v_2.AuxInt != -1 {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstoreidx {
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MSUB a _ (MOVDconst [0]))
+       // cond:
+       // result: a
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst {
+               if v_2.AuxInt != 0 {
                        break
                }
-               if w0.AuxInt != j-32 {
+               v.reset(OpCopy)
+               v.Type = a.Type
+               v.AddArg(a)
+               return true
+       }
+       // match: (MSUB a x (MOVDconst [1]))
+       // cond:
+       // result: (SUB a x)
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_2.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SUBshiftLL a x [log2(c)])
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               if w != w0.Args[0] {
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c)
+               v.AddArg(a)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && c>=3
+       // result: (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c-1) && c >= 3) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && c>=7
+       // result: (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
        for {
-               if v.AuxInt != 4 {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               s := v.Aux
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+       for {
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 2 {
+               c := v_2.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 3)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstoreidx4 {
+               c := v_2.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst {
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 5)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if w0.AuxInt != j-32 {
+               c := v_2.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
                        break
                }
-               if w != w0.Args[0] {
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 7)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MSUB a x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               c := v_2.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
-               v.AddArg(ptr1)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = 2
-               v0.AddArg(idx1)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 9)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
                v.AddArg(v0)
-               v.AddArg(w0)
-               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWstoreidx_0(v *Value) bool {
-       // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem)
+func rewriteValueARM64_OpARM64MSUB_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MSUB a (MOVDconst [-1]) x)
        // cond:
-       // result: (MOVWstore [c] ptr val mem)
+       // result: (ADD a x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[2]
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstoreidx (MOVDconst [c]) idx val mem)
+       // match: (MSUB a (MOVDconst [0]) _)
        // cond:
-       // result: (MOVWstore [c] idx val mem)
+       // result: a
        for {
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = a.Type
+               v.AddArg(a)
                return true
        }
-       // match: (MOVWstoreidx ptr (SLLconst [2] idx) val mem)
+       // match: (MSUB a (MOVDconst [1]) x)
        // cond:
-       // result: (MOVWstoreidx4 ptr idx val mem)
+       // result: (SUB a x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 2 {
+               if v_1.AuxInt != 1 {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               x := v.Args[2]
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstoreidx (SLLconst [2] idx) ptr val mem)
-       // cond:
-       // result: (MOVWstoreidx4 ptr idx val mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SUBshiftLL a x [log2(c)])
        for {
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 2 {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVWstorezeroidx ptr idx mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && c>=3
+       // result: (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_2.AuxInt != 0 {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(isPowerOfTwo(c-1) && c >= 3) {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstoreidx ptr idx (MOVWreg x) mem)
-       // cond:
-       // result: (MOVWstoreidx ptr idx x mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && c>=7
+       // result: (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWreg {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(x)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem)
-       // cond:
-       // result: (MOVWstoreidx ptr idx x mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWUreg {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(x)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 3)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVDstoreidx ptr idx w mem)
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADDconst {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 4 {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
                        break
                }
-               idx := v_1.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64SRLconst {
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 5)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_2.AuxInt != 32 {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpARM64MOVWstoreidx {
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 7)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MSUB a (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[3]
-               if ptr != x.Args[0] {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               if idx != x.Args[1] {
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 9)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MSUB_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MSUB (MOVDconst [c]) x y)
+       // cond:
+       // result: (ADDconst [c] (MNEG <x.Type> x y))
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if w != x.Args[2] {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               y := v.Args[2]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64MNEG, x.Type)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MSUB a (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (SUBconst [c*d] a)
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               c := v_1.AuxInt
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               d := v_2.AuxInt
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = c * d
+               v.AddArg(a)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWstoreidx4_0(v *Value) bool {
-       // match: (MOVWstoreidx4 ptr (MOVDconst [c]) val mem)
-       // cond:
-       // result: (MOVWstore [c<<2] ptr val mem)
+func rewriteValueARM64_OpARM64MSUBW_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: int32(c)==-1
+       // result: (ADD a x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = c << 2
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               c := v_2.AuxInt
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVWstorezeroidx4 ptr idx mem)
+       // match: (MSUBW a _ (MOVDconst [c]))
+       // cond: int32(c)==0
+       // result: a
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
+               _ = v.Args[2]
+               a := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if v_2.AuxInt != 0 {
+               c := v_2.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = a.Type
+               v.AddArg(a)
                return true
        }
-       // match: (MOVWstoreidx4 ptr idx (MOVWreg x) mem)
-       // cond:
-       // result: (MOVWstoreidx4 ptr idx x mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: int32(c)==1
+       // result: (SUB a x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWreg {
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               c := v_2.AuxInt
+               if !(int32(c) == 1) {
+                       break
+               }
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx4 ptr idx (MOVWUreg x) mem)
-       // cond:
-       // result: (MOVWstoreidx4 ptr idx x mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SUBshiftLL a x [log2(c)])
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWUreg {
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c)
+               v.AddArg(a)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstorezero [off1+off2] {sym} ptr mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c)>=3
+       // result: (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
                        break
                }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c)>=7
+       // result: (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_2.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstorezeroidx ptr idx mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               c := v_2.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 3)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstorezeroidx4 ptr idx mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
-                       break
-               }
-               if v_0.AuxInt != 2 {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               c := v_2.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 5)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               ptr0 := v.Args[0]
+               _ = v.Args[2]
+               a := v.Args[0]
                x := v.Args[1]
-               if x.Op != OpARM64MOVWstorezero {
-                       break
-               }
-               j := x.AuxInt
-               if x.Aux != s {
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := v_2.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 7)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstorezeroidx ptr1 idx1 mem)
+       // match: (MSUBW a x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
        for {
-               if v.AuxInt != 4 {
+               _ = v.Args[2]
+               a := v.Args[0]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               c := v_2.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVWstorezeroidx {
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 9)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MSUBW_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: int32(c)==-1
+       // result: (ADD a x)
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(int32(c) == -1) {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(mem)
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
+       // match: (MSUBW a (MOVDconst [c]) _)
+       // cond: int32(c)==0
+       // result: a
        for {
-               if v.AuxInt != 4 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 2 {
+               c := v_1.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVWstorezeroidx4 {
+               v.reset(OpCopy)
+               v.Type = a.Type
+               v.AddArg(a)
+               return true
+       }
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: int32(c)==1
+       // result: (SUB a x)
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(int32(c) == 1) {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx)
-               v.AddArg(ptr1)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = 2
-               v0.AddArg(idx1)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v *Value) bool {
-       // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVWstorezero [c] ptr mem)
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SUBshiftLL a x [log2(c)])
        for {
                _ = v.Args[2]
-               ptr := v.Args[0]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               x := v.Args[2]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c)
+               v.AddArg(a)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem)
-       // cond:
-       // result: (MOVWstorezero [c] idx mem)
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && int32(c)>=3
+       // result: (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
        for {
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARM64SUB)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstorezeroidx ptr (SLLconst [2] idx) mem)
-       // cond:
-       // result: (MOVWstorezeroidx4 ptr idx mem)
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && int32(c)>=7
+       // result: (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
        for {
                _ = v.Args[2]
-               ptr := v.Args[0]
+               a := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 2 {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64ADD)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstorezeroidx (SLLconst [2] idx) ptr mem)
-       // cond:
-       // result: (MOVWstorezeroidx4 ptr idx mem)
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
        for {
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 2 {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 3)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVDstorezeroidx ptr idx mem)
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
        for {
                _ = v.Args[2]
-               ptr := v.Args[0]
+               a := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADDconst {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 4 {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
-               idx := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstorezeroidx {
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 5)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
-               if idx != x.Args[1] {
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c / 7)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MSUBW a (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               c := v_1.AuxInt
+               x := v.Args[2]
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = log2(c / 9)
+               v.AddArg(a)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWstorezeroidx4_0(v *Value) bool {
-       // match: (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MSUBW_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MSUBW (MOVDconst [c]) x y)
        // cond:
-       // result: (MOVWstorezero [c<<2] ptr mem)
+       // result: (ADDconst [c] (MNEGW <x.Type> x y))
        for {
                _ = v.Args[2]
-               ptr := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               y := v.Args[2]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64MNEGW, x.Type)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MSUBW a (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (SUBconst [int64(int32(c)*int32(d))] a)
+       for {
+               _ = v.Args[2]
+               a := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = c << 2
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_2.AuxInt
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = int64(int32(c) * int32(d))
+               v.AddArg(a)
                return true
        }
        return false
@@ -26956,7 +29000,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
                return true
        }
        // match: (SUB a l:(MUL x y))
-       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: l.Uses==1 && clobber(l)
        // result: (MSUB a x y)
        for {
                _ = v.Args[1]
@@ -26968,7 +29012,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
                _ = l.Args[1]
                x := l.Args[0]
                y := l.Args[1]
-               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MSUB)
@@ -26978,7 +29022,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
                return true
        }
        // match: (SUB a l:(MNEG x y))
-       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: l.Uses==1 && clobber(l)
        // result: (MADD a x y)
        for {
                _ = v.Args[1]
@@ -26990,7 +29034,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
                _ = l.Args[1]
                x := l.Args[0]
                y := l.Args[1]
-               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MADD)
@@ -27000,7 +29044,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
                return true
        }
        // match: (SUB a l:(MULW x y))
-       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l)
        // result: (MSUBW a x y)
        for {
                _ = v.Args[1]
@@ -27012,7 +29056,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
                _ = l.Args[1]
                x := l.Args[0]
                y := l.Args[1]
-               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MSUBW)
@@ -27022,7 +29066,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
                return true
        }
        // match: (SUB a l:(MNEGW x y))
-       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l)
        // result: (MADDW a x y)
        for {
                _ = v.Args[1]
@@ -27034,7 +29078,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
                _ = l.Args[1]
                x := l.Args[0]
                y := l.Args[1]
-               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) {
                        break
                }
                v.reset(OpARM64MADDW)
@@ -39865,4 +41909,4 @@ func rewriteBlockARM64(b *Block) bool {
                }
        }
        return false
-}
+}
\ No newline at end of file
index c0539256d52e239ab3cf1139c11b4e27b112e81a..05a28695d49e415aacbf45f357eeff83c88b54e4 100644 (file)
@@ -206,8 +206,15 @@ func AddMul(x int) int {
        return 2*x + 1
 }
 
-func MULA(a, b, c uint32) uint32 {
-       // arm:`MULA`
-       // arm64:`MADDW`
-       return a*b + c
+func MULA(a, b, c uint32) (uint32, uint32, uint32) {
+       // arm:`MULA`,-`MUL\s`
+       // arm64:`MADDW`,-`MULW`
+       r0 := a*b + c
+       // arm:`MULA`-`MUL\s`
+       // arm64:`MADDW`,-`MULW`
+       r1 := c*79 + a
+       // arm:`ADD`,-`MULA`-`MUL\s`
+       // arm64:`ADD`,-`MADD`,-`MULW`
+       r2 := b*64 + c
+       return r0, r1, r2
 }