From 27cce773d3338d282039fd250c0f9bff3ecab3b0 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Fri, 15 Feb 2019 11:21:46 +0000 Subject: [PATCH] cmd/compile: optimize arm64 comparison of x and 0.0 with "FCMP $(0.0), Fn" Code: func comp(x float64) bool {return x < 0} Previous version: FMOVD "".x(FP), F0 FMOVD ZR, F1 FCMPD F1, F0 CSET MI, R0 MOVB R0, "".~r1+8(FP) RET (R30) Optimized version: FMOVD "".x(FP), F0 FCMPD $(0.0), F0 CSET MI, R0 MOVB R0, "".~r1+8(FP) RET (R30) Math package benchmark results: name old time/op new time/op delta Acos-8 77.500000ns +- 0% 77.400000ns +- 0% -0.13% (p=0.000 n=9+10) Acosh-8 98.600000ns +- 0% 98.100000ns +- 0% -0.51% (p=0.000 n=10+9) Asin-8 67.600000ns +- 0% 66.600000ns +- 0% -1.48% (p=0.000 n=9+10) Asinh-8 108.000000ns +- 0% 109.000000ns +- 0% +0.93% (p=0.000 n=10+10) Atan-8 36.788889ns +- 0% 36.000000ns +- 0% -2.14% (p=0.000 n=9+10) Atanh-8 104.000000ns +- 0% 105.000000ns +- 0% +0.96% (p=0.000 n=10+10) Atan2-8 67.100000ns +- 0% 66.600000ns +- 0% -0.75% (p=0.000 n=10+10) Cbrt-8 89.100000ns +- 0% 82.000000ns +- 0% -7.97% (p=0.000 n=10+10) Erf-8 43.500000ns +- 0% 43.000000ns +- 0% -1.15% (p=0.000 n=10+10) Erfc-8 49.000000ns +- 0% 48.220000ns +- 0% -1.59% (p=0.000 n=9+10) Erfinv-8 59.100000ns +- 0% 58.600000ns +- 0% -0.85% (p=0.000 n=10+10) Erfcinv-8 59.100000ns +- 0% 58.600000ns +- 0% -0.85% (p=0.000 n=10+10) Expm1-8 56.600000ns +- 0% 56.040000ns +- 0% -0.99% (p=0.000 n=8+10) Exp2Go-8 97.600000ns +- 0% 99.400000ns +- 0% +1.84% (p=0.000 n=10+10) Dim-8 2.500000ns +- 0% 2.250000ns +- 0% -10.00% (p=0.000 n=10+10) Mod-8 108.000000ns +- 0% 106.000000ns +- 0% -1.85% (p=0.000 n=8+8) Frexp-8 12.000000ns +- 0% 12.500000ns +- 0% +4.17% (p=0.000 n=10+10) Gamma-8 67.100000ns +- 0% 67.600000ns +- 0% +0.75% (p=0.000 n=10+10) Hypot-8 17.100000ns +- 0% 17.000000ns +- 0% -0.58% (p=0.002 n=8+10) Ilogb-8 9.010000ns +- 0% 8.510000ns +- 0% -5.55% (p=0.000 n=10+9) J1-8 288.000000ns +- 0% 287.000000ns +- 0% -0.35% (p=0.000 n=10+10) Jn-8 605.000000ns +- 0% 604.000000ns +- 0% -0.17% (p=0.001 n=8+9) Logb-8 10.600000ns +- 0% 10.500000ns +- 0% -0.94% (p=0.000 n=9+10) Log2-8 16.500000ns +- 0% 17.000000ns +- 0% +3.03% (p=0.000 n=10+10) PowFrac-8 232.000000ns +- 0% 233.000000ns +- 0% +0.43% (p=0.000 n=10+10) Remainder-8 70.600000ns +- 0% 69.600000ns +- 0% -1.42% (p=0.000 n=10+10) SqrtGoLatency-8 77.600000ns +- 0% 76.600000ns +- 0% -1.29% (p=0.000 n=10+10) Tanh-8 97.600000ns +- 0% 94.100000ns +- 0% -3.59% (p=0.000 n=10+10) Y1-8 289.000000ns +- 0% 288.000000ns +- 0% -0.35% (p=0.000 n=10+10) Yn-8 603.000000ns +- 0% 589.000000ns +- 0% -2.32% (p=0.000 n=10+10) Change-Id: I6920734f8662b329aa58f5b8e4eeae73b409984d Reviewed-on: https://go-review.googlesource.com/c/go/+/164719 Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot --- src/cmd/compile/internal/arm64/ssa.go | 6 + src/cmd/compile/internal/ssa/gen/ARM64.rules | 10 ++ src/cmd/compile/internal/ssa/gen/ARM64Ops.go | 3 + src/cmd/compile/internal/ssa/opGen.go | 22 +++ src/cmd/compile/internal/ssa/rewriteARM64.go | 158 +++++++++++++++++++ 5 files changed, 199 insertions(+) diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 0bc8f3a5ab..0ea3c191ac 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -301,6 +301,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() + case ssa.OpARM64FCMPS0, + ssa.OpARM64FCMPD0: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_FCONST + p.From.Val = math.Float64frombits(0) + p.Reg = v.Args[0].Reg() case ssa.OpARM64CMP, ssa.OpARM64CMPW, ssa.OpARM64CMN, diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 8b263a092f..3adb7895a2 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -348,6 +348,12 @@ (Geq32U x y) -> (GreaterEqualU (CMPW x y)) (Geq64U x y) -> (GreaterEqualU (CMP x y)) +// Optimize comparision between a floating-point value and 0.0 with "FCMP $(0.0), Fn" +(FCMPS x (FMOVSconst [0])) -> (FCMPS0 x) +(FCMPS (FMOVSconst [0]) x) -> (InvertFlags (FCMPS0 x)) +(FCMPD x (FMOVDconst [0])) -> (FCMPD0 x) +(FCMPD (FMOVDconst [0]) x) -> (InvertFlags (FCMPD0 x)) + // CSEL needs a flag-generating argument. Synthesize a CMPW if necessary. (CondSelect x y bool) && flagArg(bool) != nil -> (CSEL {bool.Op} x y flagArg(bool)) (CondSelect x y bool) && flagArg(bool) == nil -> (CSEL {OpARM64NotEqual} x y (CMPWconst [0] bool)) @@ -1615,6 +1621,10 @@ (LessEqualU (InvertFlags x)) -> (GreaterEqualU x) (GreaterEqual (InvertFlags x)) -> (LessEqual x) (GreaterEqualU (InvertFlags x)) -> (LessEqualU x) +(LessThanF (InvertFlags x)) -> (GreaterThanF x) +(LessEqualF (InvertFlags x)) -> (GreaterEqualF x) +(GreaterThanF (InvertFlags x)) -> (LessThanF x) +(GreaterEqualF (InvertFlags x)) -> (LessEqualF x) // Boolean-generating instructions always // zero upper bit of the register; no need to zero-extend diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index 2a65d547bd..b6bf10315e 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -158,6 +158,7 @@ func init() { fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}} fp2flags = regInfo{inputs: []regMask{fp, fp}} + fp1flags = regInfo{inputs: []regMask{fp}} fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}} fp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{fp}} fpstore = regInfo{inputs: []regMask{gpspsbg, fp}} @@ -271,6 +272,8 @@ func init() { {name: "TSTWconst", argLength: 1, reg: gp1flags, asm: "TSTW", aux: "Int32", typ: "Flags"}, // arg0 & auxInt compare to 0, 32 bit {name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"}, // arg0 compare to arg1, float32 {name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"}, // arg0 compare to arg1, float64 + {name: "FCMPS0", argLength: 1, reg: fp1flags, asm: "FCMPS", typ: "Flags"}, // arg0 compare to 0, float32 + {name: "FCMPD0", argLength: 1, reg: fp1flags, asm: "FCMPD", typ: "Flags"}, // arg0 compare to 0, float64 // shifted ops {name: "MVNshiftLL", argLength: 1, reg: gp11, asm: "MVN", aux: "Int64"}, // ^(arg0<