From b444215116e8d4a63f70f1a1b5a31f60a41a7f75 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Mon, 13 Aug 2018 10:38:25 +0000 Subject: [PATCH] cmd/compile: optimize ARM64's code with MADD/MSUB MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit MADD does MUL-ADD in a single instruction, and MSUB does the similiar simplification for MUL-SUB. The CL implements the optimization with MADD/MSUB. 1. The total size of pkg/android_arm64/ decreases about 20KB, excluding cmd/compile/. 2. The go1 benchmark shows a little improvement for RegexpMatchHard_32-4 and Template-4, excluding noise. name old time/op new time/op delta BinaryTree17-4 16.3s ± 1% 16.5s ± 1% +1.41% (p=0.000 n=26+28) Fannkuch11-4 8.79s ± 1% 8.76s ± 0% -0.36% (p=0.000 n=26+28) FmtFprintfEmpty-4 172ns ± 0% 172ns ± 0% ~ (all equal) FmtFprintfString-4 362ns ± 1% 364ns ± 0% +0.55% (p=0.000 n=30+30) FmtFprintfInt-4 416ns ± 0% 416ns ± 0% ~ (p=0.099 n=22+30) FmtFprintfIntInt-4 655ns ± 1% 660ns ± 1% +0.76% (p=0.000 n=30+30) FmtFprintfPrefixedInt-4 810ns ± 0% 809ns ± 0% -0.08% (p=0.009 n=29+29) FmtFprintfFloat-4 1.08µs ± 0% 1.09µs ± 0% +0.61% (p=0.000 n=30+29) FmtManyArgs-4 2.70µs ± 0% 2.69µs ± 0% -0.23% (p=0.000 n=29+28) GobDecode-4 32.2ms ± 1% 32.1ms ± 1% -0.39% (p=0.000 n=27+26) GobEncode-4 27.4ms ± 2% 27.4ms ± 1% ~ (p=0.864 n=28+28) Gzip-4 1.53s ± 1% 1.52s ± 1% -0.30% (p=0.031 n=29+29) Gunzip-4 146ms ± 0% 146ms ± 0% -0.14% (p=0.001 n=25+30) HTTPClientServer-4 1.00ms ± 4% 0.98ms ± 6% -1.65% (p=0.001 n=29+30) JSONEncode-4 67.3ms ± 1% 67.2ms ± 1% ~ (p=0.520 n=28+28) JSONDecode-4 329ms ± 5% 330ms ± 4% ~ (p=0.142 n=30+30) Mandelbrot200-4 17.3ms ± 0% 17.3ms ± 0% ~ (p=0.055 n=26+29) GoParse-4 16.9ms ± 1% 17.0ms ± 1% +0.82% (p=0.000 n=30+30) RegexpMatchEasy0_32-4 382ns ± 0% 382ns ± 0% ~ (all equal) RegexpMatchEasy0_1K-4 1.33µs ± 0% 1.33µs ± 0% -0.25% (p=0.000 n=30+27) RegexpMatchEasy1_32-4 361ns ± 0% 361ns ± 0% -0.08% (p=0.002 n=30+28) RegexpMatchEasy1_1K-4 2.11µs ± 0% 2.09µs ± 0% -0.54% (p=0.000 n=30+29) RegexpMatchMedium_32-4 594ns ± 0% 592ns ± 0% -0.32% (p=0.000 n=30+30) RegexpMatchMedium_1K-4 173µs ± 0% 172µs ± 0% -0.77% (p=0.000 n=29+27) RegexpMatchHard_32-4 10.4µs ± 0% 10.1µs ± 0% -3.63% (p=0.000 n=28+27) RegexpMatchHard_1K-4 306µs ± 0% 301µs ± 0% -1.64% (p=0.000 n=29+30) Revcomp-4 2.51s ± 1% 2.52s ± 0% +0.18% (p=0.017 n=26+27) Template-4 394ms ± 3% 382ms ± 3% -3.22% (p=0.000 n=28+28) TimeParse-4 1.67µs ± 0% 1.67µs ± 0% +0.05% (p=0.030 n=27+30) TimeFormat-4 1.72µs ± 0% 1.70µs ± 0% -0.79% (p=0.000 n=28+26) [Geo mean] 259µs 259µs -0.33% name old speed new speed delta GobDecode-4 23.8MB/s ± 1% 23.9MB/s ± 1% +0.40% (p=0.001 n=27+26) GobEncode-4 28.0MB/s ± 2% 28.0MB/s ± 1% ~ (p=0.863 n=28+28) Gzip-4 12.7MB/s ± 1% 12.7MB/s ± 1% +0.32% (p=0.026 n=29+29) Gunzip-4 133MB/s ± 0% 133MB/s ± 0% +0.15% (p=0.001 n=24+30) JSONEncode-4 28.8MB/s ± 1% 28.9MB/s ± 1% ~ (p=0.475 n=28+28) JSONDecode-4 5.89MB/s ± 4% 5.87MB/s ± 5% ~ (p=0.174 n=29+30) GoParse-4 3.43MB/s ± 0% 3.40MB/s ± 1% -0.83% (p=0.000 n=28+30) RegexpMatchEasy0_32-4 83.6MB/s ± 0% 83.6MB/s ± 0% ~ (p=0.848 n=28+29) RegexpMatchEasy0_1K-4 768MB/s ± 0% 770MB/s ± 0% +0.25% (p=0.000 n=30+27) RegexpMatchEasy1_32-4 88.5MB/s ± 0% 88.5MB/s ± 0% ~ (p=0.086 n=29+29) RegexpMatchEasy1_1K-4 486MB/s ± 0% 489MB/s ± 0% +0.54% (p=0.000 n=30+29) RegexpMatchMedium_32-4 1.68MB/s ± 0% 1.69MB/s ± 0% +0.60% (p=0.000 n=30+23) RegexpMatchMedium_1K-4 5.90MB/s ± 0% 5.95MB/s ± 0% +0.85% (p=0.000 n=18+20) RegexpMatchHard_32-4 3.07MB/s ± 0% 3.18MB/s ± 0% +3.72% (p=0.000 n=29+26) RegexpMatchHard_1K-4 3.35MB/s ± 0% 3.40MB/s ± 0% +1.69% (p=0.000 n=30+30) Revcomp-4 101MB/s ± 0% 101MB/s ± 0% -0.18% (p=0.018 n=26+27) Template-4 4.92MB/s ± 4% 5.09MB/s ± 3% +3.31% (p=0.000 n=28+28) [Geo mean] 22.4MB/s 22.6MB/s +0.62% Change-Id: I8f304b272785739f57b3c8f736316f658f8c1b2a Reviewed-on: https://go-review.googlesource.com/129119 Run-TryBot: Ben Shi TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/arm64/ssa.go | 6 +- src/cmd/compile/internal/ssa/gen/ARM64.rules | 39 + src/cmd/compile/internal/ssa/gen/ARM64Ops.go | 5 + src/cmd/compile/internal/ssa/opGen.go | 64 ++ src/cmd/compile/internal/ssa/rewriteARM64.go | 1066 +++++++++++++++++- test/codegen/arithmetic.go | 6 + 6 files changed, 1183 insertions(+), 3 deletions(-) diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 3712a73eb5..db7064cff0 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -212,7 +212,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ssa.OpARM64FMSUBS, ssa.OpARM64FMSUBD, ssa.OpARM64FNMSUBS, - ssa.OpARM64FNMSUBD: + ssa.OpARM64FNMSUBD, + ssa.OpARM64MADD, + ssa.OpARM64MADDW, + ssa.OpARM64MSUB, + ssa.OpARM64MSUBW: rt := v.Reg() ra := v.Args[0].Reg() rm := v.Args[1].Reg() diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index d207806819..374ece24e5 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -594,6 +594,34 @@ (EQ (CMPWconst [0] x) yes no) -> (ZW x yes no) (NE (CMPWconst [0] x) yes no) -> (NZW x yes no) +(EQ (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (EQ (CMN a (MUL x y)) yes no) +(NE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (NE (CMN a (MUL x y)) yes no) +(LT (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (LT (CMN a (MUL x y)) yes no) +(LE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (LE (CMN a (MUL x y)) yes no) +(GT (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (GT (CMN a (MUL x y)) yes no) +(GE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (GE (CMN a (MUL x y)) yes no) + +(EQ (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (EQ (CMP a (MUL x y)) yes no) +(NE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (NE (CMP a (MUL x y)) yes no) +(LE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (LE (CMP a (MUL x y)) yes no) +(LT (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (LT (CMP a (MUL x y)) yes no) +(GE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (GE (CMP a (MUL x y)) yes no) +(GT (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (GT (CMP a (MUL x y)) yes no) + +(EQ (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (EQ (CMNW a (MULW x y)) yes no) +(NE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (NE (CMNW a (MULW x y)) yes no) +(LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (LE (CMNW a (MULW x y)) yes no) +(LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (LT (CMNW a (MULW x y)) yes no) +(GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (GE (CMNW a (MULW x y)) yes no) +(GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (GT (CMNW a (MULW x y)) yes no) + +(EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (EQ (CMPW a (MULW x y)) yes no) +(NE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (NE (CMPW a (MULW x y)) yes no) +(LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (LE (CMPW a (MULW x y)) yes no) +(LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (LT (CMPW a (MULW x y)) yes no) +(GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (GE (CMPW a (MULW x y)) yes no) +(GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (GT (CMPW a (MULW x y)) yes no) + // Absorb bit-tests into block (Z (ANDconst [c] x) yes no) && oneBit(c) -> (TBZ {ntz(c)} x yes no) (NZ (ANDconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no) @@ -1058,6 +1086,17 @@ (MUL (NEG x) y) -> (MNEG x y) (MULW (NEG x) y) -> (MNEGW x y) +// madd/msub +(ADD a l:(MUL x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y) +(SUB a l:(MUL x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y) +(ADD a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y) +(SUB a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y) + +(ADD a l:(MULW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y) +(SUB a l:(MULW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y) +(ADD a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y) +(SUB a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y) + // mul by constant (MUL x (MOVDconst [-1])) -> (NEG x) (MUL _ (MOVDconst [0])) -> (MOVDconst [0]) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index 96f2ac3ceb..2c434f4a74 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -139,6 +139,7 @@ func init() { gp1flags = regInfo{inputs: []regMask{gpg}} gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} + gp31 = regInfo{inputs: []regMask{gpg, gpg, gpg}, outputs: []regMask{gp}} gp21nog = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp2flags = regInfo{inputs: []regMask{gpg, gpg}} gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} @@ -235,6 +236,10 @@ func init() { {name: "FMSUBD", argLength: 3, reg: fp31, asm: "FMSUBD"}, // +arg0 - (arg1 * arg2) {name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS"}, // -arg0 + (arg1 * arg2) {name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD"}, // -arg0 + (arg1 * arg2) + {name: "MADD", argLength: 3, reg: gp31, asm: "MADD"}, // +arg0 + (arg1 * arg2) + {name: "MADDW", argLength: 3, reg: gp31, asm: "MADDW"}, // +arg0 + (arg1 * arg2), 32-bit + {name: "MSUB", argLength: 3, reg: gp31, asm: "MSUB"}, // +arg0 - (arg1 * arg2) + {name: "MSUBW", argLength: 3, reg: gp31, asm: "MSUBW"}, // +arg0 - (arg1 * arg2), 32-bit // shifts {name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 1c9d263deb..6243bfca4d 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1129,6 +1129,10 @@ const ( OpARM64FMSUBD OpARM64FNMSUBS OpARM64FNMSUBD + OpARM64MADD + OpARM64MADDW + OpARM64MSUB + OpARM64MSUBW OpARM64SLL OpARM64SLLconst OpARM64SRL @@ -14954,6 +14958,66 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "MADD", + argLen: 3, + asm: arm64.AMADD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "MADDW", + argLen: 3, + asm: arm64.AMADDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "MSUB", + argLen: 3, + asm: arm64.AMSUB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "MSUBW", + argLen: 3, + asm: arm64.AMSUBW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, { name: "SLL", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index fc93273f36..a84d1afdf4 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -16,7 +16,7 @@ var _ = types.TypeMem // in case not otherwise used func rewriteValueARM64(v *Value) bool { switch v.Op { case OpARM64ADD: - return rewriteValueARM64_OpARM64ADD_0(v) + return rewriteValueARM64_OpARM64ADD_0(v) || rewriteValueARM64_OpARM64ADD_10(v) case OpARM64ADDconst: return rewriteValueARM64_OpARM64ADDconst_0(v) case OpARM64ADDshiftLL: @@ -284,7 +284,7 @@ func rewriteValueARM64(v *Value) bool { case OpARM64STP: return rewriteValueARM64_OpARM64STP_0(v) case OpARM64SUB: - return rewriteValueARM64_OpARM64SUB_0(v) + return rewriteValueARM64_OpARM64SUB_0(v) || rewriteValueARM64_OpARM64SUB_10(v) case OpARM64SUBconst: return rewriteValueARM64_OpARM64SUBconst_0(v) case OpARM64SUBshiftLL: @@ -905,6 +905,185 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { v.AddArg(x) return true } + // match: (ADD a l:(MUL x y)) + // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MADD a x y) + for { + _ = v.Args[1] + a := v.Args[0] + l := v.Args[1] + if l.Op != OpARM64MUL { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MADD) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADD l:(MUL x y) a) + // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MADD a x y) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != OpARM64MUL { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + a := v.Args[1] + if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MADD) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADD a l:(MNEG x y)) + // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MSUB a x y) + for { + _ = v.Args[1] + a := v.Args[0] + l := v.Args[1] + if l.Op != OpARM64MNEG { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MSUB) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADD l:(MNEG x y) a) + // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MSUB a x y) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != OpARM64MNEG { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + a := v.Args[1] + if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MSUB) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADD a l:(MULW x y)) + // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MADDW a x y) + for { + _ = v.Args[1] + a := v.Args[0] + l := v.Args[1] + if l.Op != OpARM64MULW { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MADDW) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADD l:(MULW x y) a) + // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MADDW a x y) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != OpARM64MULW { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + a := v.Args[1] + if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MADDW) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADD a l:(MNEGW x y)) + // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MSUBW a x y) + for { + _ = v.Args[1] + a := v.Args[0] + l := v.Args[1] + if l.Op != OpARM64MNEGW { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MSUBW) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADD l:(MNEGW x y) a) + // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MSUBW a x y) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != OpARM64MNEGW { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + a := v.Args[1] + if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MSUBW) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} +func rewriteValueARM64_OpARM64ADD_10(v *Value) bool { // match: (ADD x (NEG y)) // cond: // result: (SUB x y) @@ -24624,6 +24803,94 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool { v.AddArg(x) return true } + // match: (SUB a l:(MUL x y)) + // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MSUB a x y) + for { + _ = v.Args[1] + a := v.Args[0] + l := v.Args[1] + if l.Op != OpARM64MUL { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MSUB) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (SUB a l:(MNEG x y)) + // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MADD a x y) + for { + _ = v.Args[1] + a := v.Args[0] + l := v.Args[1] + if l.Op != OpARM64MNEG { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MADD) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (SUB a l:(MULW x y)) + // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MSUBW a x y) + for { + _ = v.Args[1] + a := v.Args[0] + l := v.Args[1] + if l.Op != OpARM64MULW { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MSUBW) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (SUB a l:(MNEGW x y)) + // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // result: (MADDW a x y) + for { + _ = v.Args[1] + a := v.Args[0] + l := v.Args[1] + if l.Op != OpARM64MNEGW { + break + } + _ = l.Args[1] + x := l.Args[0] + y := l.Args[1] + if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + break + } + v.reset(OpARM64MADDW) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } // match: (SUB x x) // cond: // result: (MOVDconst [0]) @@ -24721,6 +24988,9 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool { v.AddArg(y) return true } + return false +} +func rewriteValueARM64_OpARM64SUB_10(v *Value) bool { // match: (SUB x0 x1:(SRAconst [c] y)) // cond: clobberIfDead(x1) // result: (SUBshiftRA x0 y [c]) @@ -32608,6 +32878,138 @@ func rewriteBlockARM64(b *Block) bool { b.Aux = nil return true } + // match: (EQ (CMPconst [0] z:(MADD a x y)) yes no) + // cond: z.Uses==1 + // result: (EQ (CMN a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADD { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64EQ + v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (EQ (CMPconst [0] z:(MSUB a x y)) yes no) + // cond: z.Uses==1 + // result: (EQ (CMP a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUB { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64EQ + v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (EQ (CMPWconst [0] z:(MADDW a x y)) yes no) + // cond: z.Uses==1 + // result: (EQ (CMNW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADDW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64EQ + v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) + // cond: z.Uses==1 + // result: (EQ (CMPW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUBW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64EQ + v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } // match: (EQ (TSTconst [c] x) yes no) // cond: oneBit(c) // result: (TBZ {ntz(c)} x yes no) @@ -32784,6 +33186,138 @@ func rewriteBlockARM64(b *Block) bool { b.Aux = nil return true } + // match: (GE (CMPconst [0] z:(MADD a x y)) yes no) + // cond: z.Uses==1 + // result: (GE (CMN a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADD { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64GE + v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (GE (CMPconst [0] z:(MSUB a x y)) yes no) + // cond: z.Uses==1 + // result: (GE (CMP a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUB { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64GE + v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (GE (CMPWconst [0] z:(MADDW a x y)) yes no) + // cond: z.Uses==1 + // result: (GE (CMNW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADDW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64GE + v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (GE (CMPWconst [0] z:(MSUBW a x y)) yes no) + // cond: z.Uses==1 + // result: (GE (CMPW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUBW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64GE + v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } // match: (GE (CMPWconst [0] x) yes no) // cond: // result: (TBZ {int64(31)} x yes no) @@ -32956,6 +33490,138 @@ func rewriteBlockARM64(b *Block) bool { b.Aux = nil return true } + // match: (GT (CMPconst [0] z:(MADD a x y)) yes no) + // cond: z.Uses==1 + // result: (GT (CMN a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADD { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64GT + v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (GT (CMPconst [0] z:(MSUB a x y)) yes no) + // cond: z.Uses==1 + // result: (GT (CMP a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUB { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64GT + v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (GT (CMPWconst [0] z:(MADDW a x y)) yes no) + // cond: z.Uses==1 + // result: (GT (CMNW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADDW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64GT + v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (GT (CMPWconst [0] z:(MSUBW a x y)) yes no) + // cond: z.Uses==1 + // result: (GT (CMPW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUBW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64GT + v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } // match: (GT (FlagEQ) yes no) // cond: // result: (First nil no yes) @@ -33248,6 +33914,138 @@ func rewriteBlockARM64(b *Block) bool { b.Aux = nil return true } + // match: (LE (CMPconst [0] z:(MADD a x y)) yes no) + // cond: z.Uses==1 + // result: (LE (CMN a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADD { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64LE + v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (LE (CMPconst [0] z:(MSUB a x y)) yes no) + // cond: z.Uses==1 + // result: (LE (CMP a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUB { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64LE + v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (LE (CMPWconst [0] z:(MADDW a x y)) yes no) + // cond: z.Uses==1 + // result: (LE (CMNW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADDW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64LE + v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (LE (CMPWconst [0] z:(MSUBW a x y)) yes no) + // cond: z.Uses==1 + // result: (LE (CMPW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUBW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64LE + v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } // match: (LE (FlagEQ) yes no) // cond: // result: (First nil yes no) @@ -33386,6 +34184,138 @@ func rewriteBlockARM64(b *Block) bool { b.Aux = nil return true } + // match: (LT (CMPconst [0] z:(MADD a x y)) yes no) + // cond: z.Uses==1 + // result: (LT (CMN a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADD { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64LT + v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (LT (CMPconst [0] z:(MSUB a x y)) yes no) + // cond: z.Uses==1 + // result: (LT (CMP a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUB { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64LT + v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (LT (CMPWconst [0] z:(MADDW a x y)) yes no) + // cond: z.Uses==1 + // result: (LT (CMNW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADDW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64LT + v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (LT (CMPWconst [0] z:(MSUBW a x y)) yes no) + // cond: z.Uses==1 + // result: (LT (CMPW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUBW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64LT + v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } // match: (LT (CMPWconst [0] x) yes no) // cond: // result: (TBNZ {int64(31)} x yes no) @@ -33706,6 +34636,138 @@ func rewriteBlockARM64(b *Block) bool { b.Aux = nil return true } + // match: (NE (CMPconst [0] z:(MADD a x y)) yes no) + // cond: z.Uses==1 + // result: (NE (CMN a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADD { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64NE + v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (NE (CMPconst [0] z:(MSUB a x y)) yes no) + // cond: z.Uses==1 + // result: (NE (CMP a (MUL x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUB { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64NE + v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (NE (CMPWconst [0] z:(MADDW a x y)) yes no) + // cond: z.Uses==1 + // result: (NE (CMNW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MADDW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64NE + v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } + // match: (NE (CMPWconst [0] z:(MSUBW a x y)) yes no) + // cond: z.Uses==1 + // result: (NE (CMPW a (MULW x y)) yes no) + for { + v := b.Control + if v.Op != OpARM64CMPWconst { + break + } + if v.AuxInt != 0 { + break + } + z := v.Args[0] + if z.Op != OpARM64MSUBW { + break + } + _ = z.Args[2] + a := z.Args[0] + x := z.Args[1] + y := z.Args[2] + if !(z.Uses == 1) { + break + } + b.Kind = BlockARM64NE + v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags) + v0.AddArg(a) + v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v1.AddArg(x) + v1.AddArg(y) + v0.AddArg(v1) + b.SetControl(v0) + b.Aux = nil + return true + } // match: (NE (TSTconst [c] x) yes no) // cond: oneBit(c) // result: (TBNZ {ntz(c)} x yes no) diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index 09a2fa091e..c0539256d5 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -205,3 +205,9 @@ func AddMul(x int) int { // amd64:"LEAQ\t1" return 2*x + 1 } + +func MULA(a, b, c uint32) uint32 { + // arm:`MULA` + // arm64:`MADDW` + return a*b + c +} -- 2.48.1