]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize ARM64's code with MADD/MSUB
authorBen Shi <powerman1st@163.com>
Mon, 13 Aug 2018 10:38:25 +0000 (10:38 +0000)
committerCherry Zhang <cherryyz@google.com>
Tue, 4 Sep 2018 20:41:58 +0000 (20:41 +0000)
MADD does MUL-ADD in a single instruction, and MSUB does the
similiar simplification for MUL-SUB.

The CL implements the optimization with MADD/MSUB.

1. The total size of pkg/android_arm64/ decreases about 20KB,
excluding cmd/compile/.

2. The go1 benchmark shows a little improvement for RegexpMatchHard_32-4
and Template-4, excluding noise.

name                     old time/op    new time/op    delta
BinaryTree17-4              16.3s ± 1%     16.5s ± 1%  +1.41%  (p=0.000 n=26+28)
Fannkuch11-4                8.79s ± 1%     8.76s ± 0%  -0.36%  (p=0.000 n=26+28)
FmtFprintfEmpty-4           172ns ± 0%     172ns ± 0%    ~     (all equal)
FmtFprintfString-4          362ns ± 1%     364ns ± 0%  +0.55%  (p=0.000 n=30+30)
FmtFprintfInt-4             416ns ± 0%     416ns ± 0%    ~     (p=0.099 n=22+30)
FmtFprintfIntInt-4          655ns ± 1%     660ns ± 1%  +0.76%  (p=0.000 n=30+30)
FmtFprintfPrefixedInt-4     810ns ± 0%     809ns ± 0%  -0.08%  (p=0.009 n=29+29)
FmtFprintfFloat-4          1.08µs ± 0%    1.09µs ± 0%  +0.61%  (p=0.000 n=30+29)
FmtManyArgs-4              2.70µs ± 0%    2.69µs ± 0%  -0.23%  (p=0.000 n=29+28)
GobDecode-4                32.2ms ± 1%    32.1ms ± 1%  -0.39%  (p=0.000 n=27+26)
GobEncode-4                27.4ms ± 2%    27.4ms ± 1%    ~     (p=0.864 n=28+28)
Gzip-4                      1.53s ± 1%     1.52s ± 1%  -0.30%  (p=0.031 n=29+29)
Gunzip-4                    146ms ± 0%     146ms ± 0%  -0.14%  (p=0.001 n=25+30)
HTTPClientServer-4         1.00ms ± 4%    0.98ms ± 6%  -1.65%  (p=0.001 n=29+30)
JSONEncode-4               67.3ms ± 1%    67.2ms ± 1%    ~     (p=0.520 n=28+28)
JSONDecode-4                329ms ± 5%     330ms ± 4%    ~     (p=0.142 n=30+30)
Mandelbrot200-4            17.3ms ± 0%    17.3ms ± 0%    ~     (p=0.055 n=26+29)
GoParse-4                  16.9ms ± 1%    17.0ms ± 1%  +0.82%  (p=0.000 n=30+30)
RegexpMatchEasy0_32-4       382ns ± 0%     382ns ± 0%    ~     (all equal)
RegexpMatchEasy0_1K-4      1.33µs ± 0%    1.33µs ± 0%  -0.25%  (p=0.000 n=30+27)
RegexpMatchEasy1_32-4       361ns ± 0%     361ns ± 0%  -0.08%  (p=0.002 n=30+28)
RegexpMatchEasy1_1K-4      2.11µs ± 0%    2.09µs ± 0%  -0.54%  (p=0.000 n=30+29)
RegexpMatchMedium_32-4      594ns ± 0%     592ns ± 0%  -0.32%  (p=0.000 n=30+30)
RegexpMatchMedium_1K-4      173µs ± 0%     172µs ± 0%  -0.77%  (p=0.000 n=29+27)
RegexpMatchHard_32-4       10.4µs ± 0%    10.1µs ± 0%  -3.63%  (p=0.000 n=28+27)
RegexpMatchHard_1K-4        306µs ± 0%     301µs ± 0%  -1.64%  (p=0.000 n=29+30)
Revcomp-4                   2.51s ± 1%     2.52s ± 0%  +0.18%  (p=0.017 n=26+27)
Template-4                  394ms ± 3%     382ms ± 3%  -3.22%  (p=0.000 n=28+28)
TimeParse-4                1.67µs ± 0%    1.67µs ± 0%  +0.05%  (p=0.030 n=27+30)
TimeFormat-4               1.72µs ± 0%    1.70µs ± 0%  -0.79%  (p=0.000 n=28+26)
[Geo mean]                  259µs          259µs       -0.33%

name                     old speed      new speed      delta
GobDecode-4              23.8MB/s ± 1%  23.9MB/s ± 1%  +0.40%  (p=0.001 n=27+26)
GobEncode-4              28.0MB/s ± 2%  28.0MB/s ± 1%    ~     (p=0.863 n=28+28)
Gzip-4                   12.7MB/s ± 1%  12.7MB/s ± 1%  +0.32%  (p=0.026 n=29+29)
Gunzip-4                  133MB/s ± 0%   133MB/s ± 0%  +0.15%  (p=0.001 n=24+30)
JSONEncode-4             28.8MB/s ± 1%  28.9MB/s ± 1%    ~     (p=0.475 n=28+28)
JSONDecode-4             5.89MB/s ± 4%  5.87MB/s ± 5%    ~     (p=0.174 n=29+30)
GoParse-4                3.43MB/s ± 0%  3.40MB/s ± 1%  -0.83%  (p=0.000 n=28+30)
RegexpMatchEasy0_32-4    83.6MB/s ± 0%  83.6MB/s ± 0%    ~     (p=0.848 n=28+29)
RegexpMatchEasy0_1K-4     768MB/s ± 0%   770MB/s ± 0%  +0.25%  (p=0.000 n=30+27)
RegexpMatchEasy1_32-4    88.5MB/s ± 0%  88.5MB/s ± 0%    ~     (p=0.086 n=29+29)
RegexpMatchEasy1_1K-4     486MB/s ± 0%   489MB/s ± 0%  +0.54%  (p=0.000 n=30+29)
RegexpMatchMedium_32-4   1.68MB/s ± 0%  1.69MB/s ± 0%  +0.60%  (p=0.000 n=30+23)
RegexpMatchMedium_1K-4   5.90MB/s ± 0%  5.95MB/s ± 0%  +0.85%  (p=0.000 n=18+20)
RegexpMatchHard_32-4     3.07MB/s ± 0%  3.18MB/s ± 0%  +3.72%  (p=0.000 n=29+26)
RegexpMatchHard_1K-4     3.35MB/s ± 0%  3.40MB/s ± 0%  +1.69%  (p=0.000 n=30+30)
Revcomp-4                 101MB/s ± 0%   101MB/s ± 0%  -0.18%  (p=0.018 n=26+27)
Template-4               4.92MB/s ± 4%  5.09MB/s ± 3%  +3.31%  (p=0.000 n=28+28)
[Geo mean]               22.4MB/s       22.6MB/s       +0.62%

Change-Id: I8f304b272785739f57b3c8f736316f658f8c1b2a
Reviewed-on: https://go-review.googlesource.com/129119
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
test/codegen/arithmetic.go

index 3712a73eb5f4ea7742caf4e4fface2385831b01e..db7064cff0e1bb2cb3947a012969d3364da7040c 100644 (file)
@@ -212,7 +212,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                ssa.OpARM64FMSUBS,
                ssa.OpARM64FMSUBD,
                ssa.OpARM64FNMSUBS,
-               ssa.OpARM64FNMSUBD:
+               ssa.OpARM64FNMSUBD,
+               ssa.OpARM64MADD,
+               ssa.OpARM64MADDW,
+               ssa.OpARM64MSUB,
+               ssa.OpARM64MSUBW:
                rt := v.Reg()
                ra := v.Args[0].Reg()
                rm := v.Args[1].Reg()
index d20780681920bf7585c2bfe4d3e34578512ebf8a..374ece24e581db580223e186c8612fea832585e3 100644 (file)
 (EQ (CMPWconst [0] x) yes no) -> (ZW x yes no)
 (NE (CMPWconst [0] x) yes no) -> (NZW x yes no)
 
+(EQ (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (EQ (CMN a (MUL <x.Type> x y)) yes no)
+(NE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (NE (CMN a (MUL <x.Type> x y)) yes no)
+(LT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (LT (CMN a (MUL <x.Type> x y)) yes no)
+(LE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (LE (CMN a (MUL <x.Type> x y)) yes no)
+(GT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (GT (CMN a (MUL <x.Type> x y)) yes no)
+(GE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (GE (CMN a (MUL <x.Type> x y)) yes no)
+
+(EQ (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (EQ (CMP a (MUL <x.Type> x y)) yes no)
+(NE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (NE (CMP a (MUL <x.Type> x y)) yes no)
+(LE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (LE (CMP a (MUL <x.Type> x y)) yes no)
+(LT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (LT (CMP a (MUL <x.Type> x y)) yes no)
+(GE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (GE (CMP a (MUL <x.Type> x y)) yes no)
+(GT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (GT (CMP a (MUL <x.Type> x y)) yes no)
+
+(EQ (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (EQ (CMNW a (MULW <x.Type> x y)) yes no)
+(NE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (NE (CMNW a (MULW <x.Type> x y)) yes no)
+(LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (LE (CMNW a (MULW <x.Type> x y)) yes no)
+(LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (LT (CMNW a (MULW <x.Type> x y)) yes no)
+(GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (GE (CMNW a (MULW <x.Type> x y)) yes no)
+(GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (GT (CMNW a (MULW <x.Type> x y)) yes no)
+
+(EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (EQ (CMPW a (MULW <x.Type> x y)) yes no)
+(NE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (NE (CMPW a (MULW <x.Type> x y)) yes no)
+(LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (LE (CMPW a (MULW <x.Type> x y)) yes no)
+(LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (LT (CMPW a (MULW <x.Type> x y)) yes no)
+(GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (GE (CMPW a (MULW <x.Type> x y)) yes no)
+(GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (GT (CMPW a (MULW <x.Type> x y)) yes no)
+
 // Absorb bit-tests into block
 (Z  (ANDconst [c] x) yes no) && oneBit(c) -> (TBZ  {ntz(c)} x yes no)
 (NZ (ANDconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no)
 (MUL (NEG x) y) -> (MNEG x y)
 (MULW (NEG x) y) -> (MNEGW x y)
 
+// madd/msub
+(ADD a l:(MUL  x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y)
+(SUB a l:(MUL  x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y)
+(ADD a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y)
+(SUB a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y)
+
+(ADD a l:(MULW  x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y)
+(SUB a l:(MULW  x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y)
+(ADD a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y)
+(SUB a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y)
+
 // mul by constant
 (MUL x (MOVDconst [-1])) -> (NEG x)
 (MUL _ (MOVDconst [0])) -> (MOVDconst [0])
index 96f2ac3ceb37f3814b8785e85bd543a002e8e990..2c434f4a740cc9060ed94e8dc2e6b4c280a16404 100644 (file)
@@ -139,6 +139,7 @@ func init() {
                gp1flags  = regInfo{inputs: []regMask{gpg}}
                gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
                gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
+               gp31      = regInfo{inputs: []regMask{gpg, gpg, gpg}, outputs: []regMask{gp}}
                gp21nog   = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
                gp2flags  = regInfo{inputs: []regMask{gpg, gpg}}
                gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
@@ -235,6 +236,10 @@ func init() {
                {name: "FMSUBD", argLength: 3, reg: fp31, asm: "FMSUBD"},   // +arg0 - (arg1 * arg2)
                {name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS"}, // -arg0 + (arg1 * arg2)
                {name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD"}, // -arg0 + (arg1 * arg2)
+               {name: "MADD", argLength: 3, reg: gp31, asm: "MADD"},       // +arg0 + (arg1 * arg2)
+               {name: "MADDW", argLength: 3, reg: gp31, asm: "MADDW"},     // +arg0 + (arg1 * arg2), 32-bit
+               {name: "MSUB", argLength: 3, reg: gp31, asm: "MSUB"},       // +arg0 - (arg1 * arg2)
+               {name: "MSUBW", argLength: 3, reg: gp31, asm: "MSUBW"},     // +arg0 - (arg1 * arg2), 32-bit
 
                // shifts
                {name: "SLL", argLength: 2, reg: gp21, asm: "LSL"},                        // arg0 << arg1, shift amount is mod 64
index 1c9d263debd0cc99c03f4e2aa025b1d4108089af..6243bfca4dabe268c92e14ddfa6ec3fcd02dea55 100644 (file)
@@ -1129,6 +1129,10 @@ const (
        OpARM64FMSUBD
        OpARM64FNMSUBS
        OpARM64FNMSUBD
+       OpARM64MADD
+       OpARM64MADDW
+       OpARM64MSUB
+       OpARM64MSUBW
        OpARM64SLL
        OpARM64SLLconst
        OpARM64SRL
@@ -14954,6 +14958,66 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "MADD",
+               argLen: 3,
+               asm:    arm64.AMADD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:   "MADDW",
+               argLen: 3,
+               asm:    arm64.AMADDW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:   "MSUB",
+               argLen: 3,
+               asm:    arm64.AMSUB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:   "MSUBW",
+               argLen: 3,
+               asm:    arm64.AMSUBW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
        {
                name:   "SLL",
                argLen: 2,
index fc93273f361a8a61806a080e89aa4198851bde70..a84d1afdf43c0bd61a22f82bf12bc50ca26d8ee9 100644 (file)
@@ -16,7 +16,7 @@ var _ = types.TypeMem // in case not otherwise used
 func rewriteValueARM64(v *Value) bool {
        switch v.Op {
        case OpARM64ADD:
-               return rewriteValueARM64_OpARM64ADD_0(v)
+               return rewriteValueARM64_OpARM64ADD_0(v) || rewriteValueARM64_OpARM64ADD_10(v)
        case OpARM64ADDconst:
                return rewriteValueARM64_OpARM64ADDconst_0(v)
        case OpARM64ADDshiftLL:
@@ -284,7 +284,7 @@ func rewriteValueARM64(v *Value) bool {
        case OpARM64STP:
                return rewriteValueARM64_OpARM64STP_0(v)
        case OpARM64SUB:
-               return rewriteValueARM64_OpARM64SUB_0(v)
+               return rewriteValueARM64_OpARM64SUB_0(v) || rewriteValueARM64_OpARM64SUB_10(v)
        case OpARM64SUBconst:
                return rewriteValueARM64_OpARM64SUBconst_0(v)
        case OpARM64SUBshiftLL:
@@ -905,6 +905,185 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (ADD a l:(MUL x y))
+       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MADD a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MUL {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MADD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD l:(MUL x y) a)
+       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MADD a x y)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpARM64MUL {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               a := v.Args[1]
+               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MADD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD a l:(MNEG x y))
+       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MSUB a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MNEG {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MSUB)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD l:(MNEG x y) a)
+       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MSUB a x y)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpARM64MNEG {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               a := v.Args[1]
+               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MSUB)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD a l:(MULW x y))
+       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MADDW a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MULW {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MADDW)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD l:(MULW x y) a)
+       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MADDW a x y)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpARM64MULW {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               a := v.Args[1]
+               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MADDW)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD a l:(MNEGW x y))
+       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MSUBW a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MNEGW {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MSUBW)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD l:(MNEGW x y) a)
+       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MSUBW a x y)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpARM64MNEGW {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               a := v.Args[1]
+               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MSUBW)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ADD_10(v *Value) bool {
        // match: (ADD x (NEG y))
        // cond:
        // result: (SUB x y)
@@ -24624,6 +24803,94 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (SUB a l:(MUL x y))
+       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MSUB a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MUL {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MSUB)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUB a l:(MNEG x y))
+       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MADD a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MNEG {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MADD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUB a l:(MULW x y))
+       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MSUBW a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MULW {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MSUBW)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUB a l:(MNEGW x y))
+       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MADDW a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MNEGW {
+                       break
+               }
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+                       break
+               }
+               v.reset(OpARM64MADDW)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        // match: (SUB x x)
        // cond:
        // result: (MOVDconst [0])
@@ -24721,6 +24988,9 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       return false
+}
+func rewriteValueARM64_OpARM64SUB_10(v *Value) bool {
        // match: (SUB x0 x1:(SRAconst [c] y))
        // cond: clobberIfDead(x1)
        // result: (SUBshiftRA x0 y [c])
@@ -32608,6 +32878,138 @@ func rewriteBlockARM64(b *Block) bool {
                        b.Aux = nil
                        return true
                }
+               // match: (EQ (CMPconst [0] z:(MADD a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (EQ (CMN a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADD {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64EQ
+                       v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (EQ (CMPconst [0] z:(MSUB a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (EQ (CMP a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUB {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64EQ
+                       v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (EQ (CMPWconst [0] z:(MADDW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (EQ (CMNW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADDW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64EQ
+                       v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (EQ (CMPWconst [0] z:(MSUBW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (EQ (CMPW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUBW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64EQ
+                       v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
                // match: (EQ (TSTconst [c] x) yes no)
                // cond: oneBit(c)
                // result: (TBZ {ntz(c)} x yes no)
@@ -32784,6 +33186,138 @@ func rewriteBlockARM64(b *Block) bool {
                        b.Aux = nil
                        return true
                }
+               // match: (GE (CMPconst [0] z:(MADD a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (GE (CMN a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADD {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (GE (CMPconst [0] z:(MSUB a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (GE (CMP a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUB {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (GE (CMPWconst [0] z:(MADDW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (GE (CMNW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADDW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (GE (CMPWconst [0] z:(MSUBW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (GE (CMPW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUBW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
                // match: (GE (CMPWconst [0] x) yes no)
                // cond:
                // result: (TBZ {int64(31)} x yes no)
@@ -32956,6 +33490,138 @@ func rewriteBlockARM64(b *Block) bool {
                        b.Aux = nil
                        return true
                }
+               // match: (GT (CMPconst [0] z:(MADD a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (GT (CMN a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADD {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GT
+                       v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (GT (CMPconst [0] z:(MSUB a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (GT (CMP a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUB {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GT
+                       v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (GT (CMPWconst [0] z:(MADDW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (GT (CMNW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADDW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GT
+                       v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (GT (CMPWconst [0] z:(MSUBW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (GT (CMPW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUBW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GT
+                       v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
                // match: (GT (FlagEQ) yes no)
                // cond:
                // result: (First nil no yes)
@@ -33248,6 +33914,138 @@ func rewriteBlockARM64(b *Block) bool {
                        b.Aux = nil
                        return true
                }
+               // match: (LE (CMPconst [0] z:(MADD a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (LE (CMN a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADD {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (LE (CMPconst [0] z:(MSUB a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (LE (CMP a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUB {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (LE (CMPWconst [0] z:(MADDW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (LE (CMNW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADDW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (LE (CMPWconst [0] z:(MSUBW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (LE (CMPW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUBW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
                // match: (LE (FlagEQ) yes no)
                // cond:
                // result: (First nil yes no)
@@ -33386,6 +34184,138 @@ func rewriteBlockARM64(b *Block) bool {
                        b.Aux = nil
                        return true
                }
+               // match: (LT (CMPconst [0] z:(MADD a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (LT (CMN a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADD {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LT
+                       v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (LT (CMPconst [0] z:(MSUB a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (LT (CMP a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUB {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LT
+                       v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (LT (CMPWconst [0] z:(MADDW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (LT (CMNW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADDW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LT
+                       v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (LT (CMPWconst [0] z:(MSUBW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (LT (CMPW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUBW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LT
+                       v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
                // match: (LT (CMPWconst [0] x) yes no)
                // cond:
                // result: (TBNZ {int64(31)} x yes no)
@@ -33706,6 +34636,138 @@ func rewriteBlockARM64(b *Block) bool {
                        b.Aux = nil
                        return true
                }
+               // match: (NE (CMPconst [0] z:(MADD a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (NE (CMN a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADD {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64NE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (NE (CMPconst [0] z:(MSUB a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (NE (CMP a (MUL <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUB {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64NE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (NE (CMPWconst [0] z:(MADDW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (NE (CMNW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MADDW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64NE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (NE (CMPWconst [0] z:(MSUBW a x y)) yes no)
+               // cond: z.Uses==1
+               // result: (NE (CMPW a (MULW <x.Type> x y)) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64MSUBW {
+                               break
+                       }
+                       _ = z.Args[2]
+                       a := z.Args[0]
+                       x := z.Args[1]
+                       y := z.Args[2]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64NE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+                       v0.AddArg(a)
+                       v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+                       v1.AddArg(x)
+                       v1.AddArg(y)
+                       v0.AddArg(v1)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
                // match: (NE (TSTconst [c] x) yes no)
                // cond: oneBit(c)
                // result: (TBNZ {ntz(c)} x yes no)
index 09a2fa091e803f50fb85bdd949df2f17bd412d4a..c0539256d52e239ab3cf1139c11b4e27b112e81a 100644 (file)
@@ -205,3 +205,9 @@ func AddMul(x int) int {
        // amd64:"LEAQ\t1"
        return 2*x + 1
 }
+
+func MULA(a, b, c uint32) uint32 {
+       // arm:`MULA`
+       // arm64:`MADDW`
+       return a*b + c
+}