]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize ARM code with MULAF/MULSF/MULAD/MULSD
authorBen Shi <powerman1st@163.com>
Thu, 14 Sep 2017 06:52:51 +0000 (06:52 +0000)
committerCherry Zhang <cherryyz@google.com>
Fri, 15 Sep 2017 22:30:34 +0000 (22:30 +0000)
The go compiler can generate better ARM code with those more
efficient FP instructions. And there is little improvement
in total but big improvement in special cases.

1. The size of pkg/linux_arm/math.a shrinks by 2.4%.

2. there is neither improvement nor regression in compilecmp benchmark.
name        old time/op       new time/op       delta
Template          2.32s ± 2%        2.32s ± 1%    ~     (p=1.000 n=9+10)
Unicode           1.32s ± 4%        1.32s ± 4%    ~     (p=0.912 n=10+10)
GoTypes           7.76s ± 1%        7.79s ± 1%    ~     (p=0.447 n=9+10)
Compiler          37.4s ± 2%        37.2s ± 2%    ~     (p=0.218 n=10+10)
SSA               84.8s ± 2%        85.0s ± 1%    ~     (p=0.604 n=10+9)
Flate             1.45s ± 2%        1.44s ± 2%    ~     (p=0.075 n=10+10)
GoParser          1.82s ± 1%        1.81s ± 1%    ~     (p=0.190 n=10+10)
Reflect           5.06s ± 1%        5.05s ± 1%    ~     (p=0.315 n=10+9)
Tar               2.37s ± 1%        2.37s ± 2%    ~     (p=0.912 n=10+10)
XML               2.56s ± 1%        2.58s ± 2%    ~     (p=0.089 n=10+10)
[Geo mean]        4.77s             4.77s       -0.08%

name        old user-time/op  new user-time/op  delta
Template          2.74s ± 2%        2.75s ± 2%    ~     (p=0.856 n=9+10)
Unicode           1.61s ± 4%        1.62s ± 3%    ~     (p=0.693 n=10+10)
GoTypes           9.55s ± 1%        9.49s ± 2%    ~     (p=0.056 n=9+10)
Compiler          45.9s ± 1%        45.8s ± 1%    ~     (p=0.345 n=9+10)
SSA                110s ± 1%         110s ± 1%    ~     (p=0.763 n=9+10)
Flate             1.68s ± 2%        1.68s ± 3%    ~     (p=0.616 n=10+10)
GoParser          2.14s ± 4%        2.14s ± 1%    ~     (p=0.825 n=10+9)
Reflect           5.95s ± 1%        5.97s ± 3%    ~     (p=0.951 n=9+10)
Tar               2.94s ± 3%        2.93s ± 2%    ~     (p=0.359 n=10+10)
XML               3.03s ± 3%        3.07s ± 6%    ~     (p=0.166 n=10+10)
[Geo mean]        5.76s             5.77s       +0.12%

name        old text-bytes    new text-bytes    delta
HelloSize         588kB ± 0%        588kB ± 0%    ~     (all equal)

name        old data-bytes    new data-bytes    delta
HelloSize        5.46kB ± 0%       5.46kB ± 0%    ~     (all equal)

name        old bss-bytes     new bss-bytes     delta
HelloSize        72.9kB ± 0%       72.9kB ± 0%    ~     (all equal)

name        old exe-bytes     new exe-bytes     delta
HelloSize        1.03MB ± 0%       1.03MB ± 0%    ~     (all equal)

3. The performance of Mandelbrot200 improves 15%, though little
   improvement in total.
name                     old time/op    new time/op    delta
BinaryTree17-4              41.7s ± 1%     41.7s ± 1%     ~     (p=0.264 n=29+23)
Fannkuch11-4                24.2s ± 0%     24.1s ± 1%   -0.13%  (p=0.050 n=30+30)
FmtFprintfEmpty-4           826ns ± 1%     824ns ± 1%   -0.24%  (p=0.038 n=25+30)
FmtFprintfString-4         1.38µs ± 1%    1.38µs ± 0%   -0.42%  (p=0.000 n=27+25)
FmtFprintfInt-4            1.46µs ± 1%    1.46µs ± 0%     ~     (p=0.060 n=30+23)
FmtFprintfIntInt-4         2.11µs ± 1%    2.08µs ± 0%   -1.04%  (p=0.000 n=30+30)
FmtFprintfPrefixedInt-4    2.23µs ± 1%    2.22µs ± 1%   -0.51%  (p=0.000 n=30+30)
FmtFprintfFloat-4          4.49µs ± 1%    4.48µs ± 1%   -0.22%  (p=0.004 n=26+30)
FmtManyArgs-4              8.06µs ± 1%    8.12µs ± 1%   +0.68%  (p=0.000 n=25+30)
GobDecode-4                 104ms ± 1%     104ms ± 2%     ~     (p=0.362 n=29+29)
GobEncode-4                92.9ms ± 1%    92.8ms ± 2%     ~     (p=0.786 n=30+30)
Gzip-4                      4.12s ± 1%     4.12s ± 1%     ~     (p=0.314 n=30+30)
Gunzip-4                    602ms ± 1%     603ms ± 1%     ~     (p=0.164 n=30+30)
HTTPClientServer-4          659µs ± 1%     655µs ± 2%   -0.64%  (p=0.006 n=25+28)
JSONEncode-4                234ms ± 1%     235ms ± 1%   +0.29%  (p=0.050 n=30+30)
JSONDecode-4                912ms ± 0%     911ms ± 0%     ~     (p=0.385 n=18+24)
Mandelbrot200-4            49.2ms ± 0%    41.7ms ± 0%  -15.35%  (p=0.000 n=25+27)
GoParse-4                  46.3ms ± 1%    46.3ms ± 2%     ~     (p=0.572 n=30+30)
RegexpMatchEasy0_32-4      1.29µs ± 1%    1.27µs ± 0%   -1.59%  (p=0.000 n=30+30)
RegexpMatchEasy0_1K-4      7.62µs ± 4%    7.71µs ± 3%     ~     (p=0.074 n=30+30)
RegexpMatchEasy1_32-4      1.31µs ± 0%    1.30µs ± 1%   -0.71%  (p=0.000 n=23+30)
RegexpMatchEasy1_1K-4      10.3µs ± 3%    10.3µs ± 5%     ~     (p=0.105 n=30+30)
RegexpMatchMedium_32-4     2.06µs ± 1%    2.06µs ± 1%     ~     (p=0.100 n=30+30)
RegexpMatchMedium_1K-4      533µs ± 1%     534µs ± 1%     ~     (p=0.254 n=29+30)
RegexpMatchHard_32-4       28.9µs ± 0%    28.9µs ± 0%     ~     (p=0.154 n=30+30)
RegexpMatchHard_1K-4        868µs ± 1%     867µs ± 0%     ~     (p=0.729 n=30+23)
Revcomp-4                  66.9ms ± 1%    67.2ms ± 2%     ~     (p=0.102 n=28+29)
Template-4                  1.07s ± 1%     1.06s ± 1%   -0.53%  (p=0.000 n=30+30)
TimeParse-4                7.07µs ± 1%    7.01µs ± 0%   -0.85%  (p=0.000 n=30+25)
TimeFormat-4               13.1µs ± 0%    13.2µs ± 1%   +0.77%  (p=0.000 n=27+27)
[Geo mean]                  721µs          716µs        -0.70%

name                     old speed      new speed      delta
GobDecode-4              7.38MB/s ± 1%  7.37MB/s ± 2%     ~     (p=0.399 n=29+29)
GobEncode-4              8.26MB/s ± 1%  8.27MB/s ± 2%     ~     (p=0.790 n=30+30)
Gzip-4                   4.71MB/s ± 1%  4.71MB/s ± 1%     ~     (p=0.885 n=30+30)
Gunzip-4                 32.2MB/s ± 1%  32.2MB/s ± 1%     ~     (p=0.190 n=30+30)
JSONEncode-4             8.28MB/s ± 1%  8.25MB/s ± 1%     ~     (p=0.053 n=30+30)
JSONDecode-4             2.13MB/s ± 0%  2.12MB/s ± 1%     ~     (p=0.072 n=18+30)
GoParse-4                1.25MB/s ± 1%  1.25MB/s ± 2%     ~     (p=0.863 n=30+30)
RegexpMatchEasy0_32-4    24.8MB/s ± 0%  25.2MB/s ± 1%   +1.61%  (p=0.000 n=30+30)
RegexpMatchEasy0_1K-4     134MB/s ± 4%   133MB/s ± 3%     ~     (p=0.074 n=30+30)
RegexpMatchEasy1_32-4    24.5MB/s ± 0%  24.6MB/s ± 1%   +0.72%  (p=0.000 n=23+30)
RegexpMatchEasy1_1K-4    99.1MB/s ± 3%  99.8MB/s ± 5%     ~     (p=0.105 n=30+30)
RegexpMatchMedium_32-4    483kB/s ± 1%   487kB/s ± 1%   +0.83%  (p=0.002 n=30+30)
RegexpMatchMedium_1K-4   1.92MB/s ± 1%  1.92MB/s ± 1%     ~     (p=0.058 n=30+30)
RegexpMatchHard_32-4     1.10MB/s ± 0%  1.11MB/s ± 0%     ~     (p=0.804 n=30+30)
RegexpMatchHard_1K-4     1.18MB/s ± 0%  1.18MB/s ± 0%     ~     (all equal)
Revcomp-4                38.0MB/s ± 1%  37.8MB/s ± 2%     ~     (p=0.098 n=28+29)
Template-4               1.82MB/s ± 1%  1.83MB/s ± 1%   +0.55%  (p=0.000 n=29+29)
[Geo mean]               6.79MB/s       6.79MB/s        +0.09%

Change-Id: Ia91991c2c5c59c5df712de85a83b13a21c0a554b
Reviewed-on: https://go-review.googlesource.com/63770
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/arm/ssa.go
src/cmd/compile/internal/ssa/gen/ARM.rules
src/cmd/compile/internal/ssa/gen/ARMOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM.go

index 5525197d317d03eaf7daf9b70fe39176e4509d4a..a70df6dd0ee146139c8b419fb59219a688de82b7 100644 (file)
@@ -197,6 +197,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.Reg = r1
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
+       case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD:
+               r := v.Reg()
+               r0 := v.Args[0].Reg()
+               r1 := v.Args[1].Reg()
+               r2 := v.Args[2].Reg()
+               if r != r0 {
+                       v.Fatalf("result and addend are not in the same register: %v", v.LongString())
+               }
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r2
+               p.Reg = r1
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = r
        case ssa.OpARMADDS,
                ssa.OpARMSUBS:
                r := v.Reg0()
index 197f9540d3062f4ce617116b232fd9e52e4bb391..b21cd6f9f3d6f8d53c6e0f78cc9c98ada6284096 100644 (file)
 (NMULF (NEGF x) y) -> (MULF x y)
 (NMULD (NEGD x) y) -> (MULD x y)
 
+// the result will overwrite the addend, since they are in the same register
+(ADDF a (MULF x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULAF a x y)
+(ADDF a (NMULF x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULSF a x y)
+(ADDD a (MULD x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULAD a x y)
+(ADDD a (NMULD x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULSD a x y)
+(SUBF a (MULF x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULSF a x y)
+(SUBF a (NMULF x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULAF a x y)
+(SUBD a (MULD x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULSD a x y)
+(SUBD a (NMULD x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULAD a x y)
+
 (AND x (MVN y)) -> (BIC x y)
 
 // simplification with *shift ops
index f94ef532580ba25fb5f70a87fd9bd0f9f1de7edc..93b50135d437d7694140da1b0e97c07d2a60076f 100644 (file)
@@ -122,6 +122,7 @@ func init() {
                fpgp      = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}, clobbers: buildReg("F15")} // int-float conversion uses F15 as tmp
                gpfp      = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}, clobbers: buildReg("F15")}
                fp21      = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
+               fp31      = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
                fp2flags  = regInfo{inputs: []regMask{fp, fp}}
                fpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
                fpstore   = regInfo{inputs: []regMask{gpspsbg, fp}}
@@ -181,6 +182,11 @@ func init() {
                {name: "DIVF", argLength: 2, reg: fp21, asm: "DIVF"},                      // arg0 / arg1
                {name: "DIVD", argLength: 2, reg: fp21, asm: "DIVD"},                      // arg0 / arg1
 
+               {name: "MULAF", argLength: 3, reg: fp31, asm: "MULAF", resultInArg0: true}, // arg0 + (arg1 * arg2)
+               {name: "MULAD", argLength: 3, reg: fp31, asm: "MULAD", resultInArg0: true}, // arg0 + (arg1 * arg2)
+               {name: "MULSF", argLength: 3, reg: fp31, asm: "MULSF", resultInArg0: true}, // arg0 - (arg1 * arg2)
+               {name: "MULSD", argLength: 3, reg: fp31, asm: "MULSD", resultInArg0: true}, // arg0 - (arg1 * arg2)
+
                {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
                {name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt
                {name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true},  // arg0 | arg1
index c5d43f4611f07628dfb19a774ebfa8d08ac69684..4493759ae93a821b9abaca6eecb270f70ca30f98 100644 (file)
@@ -707,6 +707,10 @@ const (
        OpARMNMULD
        OpARMDIVF
        OpARMDIVD
+       OpARMMULAF
+       OpARMMULAD
+       OpARMMULSF
+       OpARMMULSD
        OpARMAND
        OpARMANDconst
        OpARMOR
@@ -8655,6 +8659,70 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "MULAF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          arm.AMULAF,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
+       {
+               name:         "MULAD",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          arm.AMULAD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
+       {
+               name:         "MULSF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          arm.AMULSF,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
+       {
+               name:         "MULSD",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          arm.AMULSD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
        {
                name:        "AND",
                argLen:      2,
index 1c7eb39236af43bf20a509b5407e994136b3d342..38695c503dbdf7f173582e81e93e9bb5a7bd5a3f 100644 (file)
@@ -33,6 +33,10 @@ func rewriteValueARM(v *Value) bool {
                return rewriteValueARM_OpARMADCshiftRLreg_0(v)
        case OpARMADD:
                return rewriteValueARM_OpARMADD_0(v) || rewriteValueARM_OpARMADD_10(v)
+       case OpARMADDD:
+               return rewriteValueARM_OpARMADDD_0(v)
+       case OpARMADDF:
+               return rewriteValueARM_OpARMADDF_0(v)
        case OpARMADDS:
                return rewriteValueARM_OpARMADDS_0(v) || rewriteValueARM_OpARMADDS_10(v)
        case OpARMADDSshiftLL:
@@ -321,6 +325,10 @@ func rewriteValueARM(v *Value) bool {
                return rewriteValueARM_OpARMSRLconst_0(v)
        case OpARMSUB:
                return rewriteValueARM_OpARMSUB_0(v) || rewriteValueARM_OpARMSUB_10(v)
+       case OpARMSUBD:
+               return rewriteValueARM_OpARMSUBD_0(v)
+       case OpARMSUBF:
+               return rewriteValueARM_OpARMSUBF_0(v)
        case OpARMSUBS:
                return rewriteValueARM_OpARMSUBS_0(v) || rewriteValueARM_OpARMSUBS_10(v)
        case OpARMSUBSshiftLL:
@@ -2039,6 +2047,188 @@ func rewriteValueARM_OpARMADD_10(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM_OpARMADDD_0(v *Value) bool {
+       // match: (ADDD a (MULD x y))
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULAD a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMULD {
+                       break
+               }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULAD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDD (MULD x y) a)
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULAD a x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMULD {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               a := v.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULAD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDD a (NMULD x y))
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULSD a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMNMULD {
+                       break
+               }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULSD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDD (NMULD x y) a)
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULSD a x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMNMULD {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               a := v.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULSD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM_OpARMADDF_0(v *Value) bool {
+       // match: (ADDF a (MULF x y))
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULAF a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMULF {
+                       break
+               }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULAF)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDF (MULF x y) a)
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULAF a x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMMULF {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               a := v.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULAF)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDF a (NMULF x y))
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULSF a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMNMULF {
+                       break
+               }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULSF)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDF (NMULF x y) a)
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULSF a x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARMNMULF {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               a := v.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULSF)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
 func rewriteValueARM_OpARMADDS_0(v *Value) bool {
        // match: (ADDS x (MOVWconst [c]))
        // cond:
@@ -13627,6 +13817,100 @@ func rewriteValueARM_OpARMSUB_10(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM_OpARMSUBD_0(v *Value) bool {
+       // match: (SUBD a (MULD x y))
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULSD a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMULD {
+                       break
+               }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULSD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUBD a (NMULD x y))
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULAD a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMNMULD {
+                       break
+               }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULAD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM_OpARMSUBF_0(v *Value) bool {
+       // match: (SUBF a (MULF x y))
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULSF a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMMULF {
+                       break
+               }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULSF)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUBF a (NMULF x y))
+       // cond: a.Uses == 1 && objabi.GOARM >= 6
+       // result: (MULAF a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARMNMULF {
+                       break
+               }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               if !(a.Uses == 1 && objabi.GOARM >= 6) {
+                       break
+               }
+               v.reset(OpARMMULAF)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
 func rewriteValueARM_OpARMSUBS_0(v *Value) bool {
        // match: (SUBS x (MOVWconst [c]))
        // cond: