]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize arm64 comparison of x and 0.0 with "FCMP $(0.0), Fn"
authorfanzha02 <fannie.zhang@arm.com>
Fri, 15 Feb 2019 11:21:46 +0000 (11:21 +0000)
committerCherry Zhang <cherryyz@google.com>
Thu, 7 Mar 2019 22:04:09 +0000 (22:04 +0000)
Code:
func comp(x float64) bool {return x < 0}

Previous version:
  FMOVD "".x(FP), F0
  FMOVD ZR, F1
  FCMPD F1, F0
  CSET MI, R0
  MOVB R0, "".~r1+8(FP)
  RET (R30)

Optimized version:
  FMOVD "".x(FP), F0
  FCMPD $(0.0), F0
  CSET MI, R0
  MOVB R0, "".~r1+8(FP)
  RET (R30)

Math package benchmark results:
name                   old time/op          new time/op          delta
Acos-8                   77.500000ns +- 0%    77.400000ns +- 0%   -0.13%  (p=0.000 n=9+10)
Acosh-8                  98.600000ns +- 0%    98.100000ns +- 0%   -0.51%  (p=0.000 n=10+9)
Asin-8                   67.600000ns +- 0%    66.600000ns +- 0%   -1.48%  (p=0.000 n=9+10)
Asinh-8                 108.000000ns +- 0%   109.000000ns +- 0%   +0.93%  (p=0.000 n=10+10)
Atan-8                   36.788889ns +- 0%    36.000000ns +- 0%   -2.14%  (p=0.000 n=9+10)
Atanh-8                 104.000000ns +- 0%   105.000000ns +- 0%   +0.96%  (p=0.000 n=10+10)
Atan2-8                  67.100000ns +- 0%    66.600000ns +- 0%   -0.75%  (p=0.000 n=10+10)
Cbrt-8                   89.100000ns +- 0%    82.000000ns +- 0%   -7.97%  (p=0.000 n=10+10)
Erf-8                    43.500000ns +- 0%    43.000000ns +- 0%   -1.15%  (p=0.000 n=10+10)
Erfc-8                   49.000000ns +- 0%    48.220000ns +- 0%   -1.59%  (p=0.000 n=9+10)
Erfinv-8                 59.100000ns +- 0%    58.600000ns +- 0%   -0.85%  (p=0.000 n=10+10)
Erfcinv-8                59.100000ns +- 0%    58.600000ns +- 0%   -0.85%  (p=0.000 n=10+10)
Expm1-8                  56.600000ns +- 0%    56.040000ns +- 0%   -0.99%  (p=0.000 n=8+10)
Exp2Go-8                 97.600000ns +- 0%    99.400000ns +- 0%   +1.84%  (p=0.000 n=10+10)
Dim-8                     2.500000ns +- 0%     2.250000ns +- 0%  -10.00%  (p=0.000 n=10+10)
Mod-8                   108.000000ns +- 0%   106.000000ns +- 0%   -1.85%  (p=0.000 n=8+8)
Frexp-8                  12.000000ns +- 0%    12.500000ns +- 0%   +4.17%  (p=0.000 n=10+10)
Gamma-8                  67.100000ns +- 0%    67.600000ns +- 0%   +0.75%  (p=0.000 n=10+10)
Hypot-8                  17.100000ns +- 0%    17.000000ns +- 0%   -0.58%  (p=0.002 n=8+10)
Ilogb-8                   9.010000ns +- 0%     8.510000ns +- 0%   -5.55%  (p=0.000 n=10+9)
J1-8                    288.000000ns +- 0%   287.000000ns +- 0%   -0.35%  (p=0.000 n=10+10)
Jn-8                    605.000000ns +- 0%   604.000000ns +- 0%   -0.17%  (p=0.001 n=8+9)
Logb-8                   10.600000ns +- 0%    10.500000ns +- 0%   -0.94%  (p=0.000 n=9+10)
Log2-8                   16.500000ns +- 0%    17.000000ns +- 0%   +3.03%  (p=0.000 n=10+10)
PowFrac-8               232.000000ns +- 0%   233.000000ns +- 0%   +0.43%  (p=0.000 n=10+10)
Remainder-8              70.600000ns +- 0%    69.600000ns +- 0%   -1.42%  (p=0.000 n=10+10)
SqrtGoLatency-8          77.600000ns +- 0%    76.600000ns +- 0%   -1.29%  (p=0.000 n=10+10)
Tanh-8                   97.600000ns +- 0%    94.100000ns +- 0%   -3.59%  (p=0.000 n=10+10)
Y1-8                    289.000000ns +- 0%   288.000000ns +- 0%   -0.35%  (p=0.000 n=10+10)
Yn-8                    603.000000ns +- 0%   589.000000ns +- 0%   -2.32%  (p=0.000 n=10+10)

Change-Id: I6920734f8662b329aa58f5b8e4eeae73b409984d
Reviewed-on: https://go-review.googlesource.com/c/go/+/164719
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go

index 0bc8f3a5aba8e6d062e78905832688df39cce3bf..0ea3c191acdfab3450915a46436ebd9e5a87bd86 100644 (file)
@@ -301,6 +301,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.From.Val = math.Float64frombits(uint64(v.AuxInt))
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
+       case ssa.OpARM64FCMPS0,
+               ssa.OpARM64FCMPD0:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_FCONST
+               p.From.Val = math.Float64frombits(0)
+               p.Reg = v.Args[0].Reg()
        case ssa.OpARM64CMP,
                ssa.OpARM64CMPW,
                ssa.OpARM64CMN,
index 8b263a092fc6c3fdcf2008ba461563ffc98bc5bf..3adb7895a2c7ea001150fffc34153de52ba97f0a 100644 (file)
 (Geq32U x y) -> (GreaterEqualU (CMPW x y))
 (Geq64U x y) -> (GreaterEqualU (CMP x y))
 
+// Optimize comparision between a floating-point value and 0.0 with "FCMP $(0.0), Fn"
+(FCMPS x (FMOVSconst [0])) -> (FCMPS0 x)
+(FCMPS (FMOVSconst [0]) x) -> (InvertFlags (FCMPS0 x))
+(FCMPD x (FMOVDconst [0])) -> (FCMPD0 x)
+(FCMPD (FMOVDconst [0]) x) -> (InvertFlags (FCMPD0 x))
+
 // CSEL needs a flag-generating argument. Synthesize a CMPW if necessary.
 (CondSelect x y bool) && flagArg(bool) != nil -> (CSEL {bool.Op} x y flagArg(bool))
 (CondSelect x y bool) && flagArg(bool) == nil -> (CSEL {OpARM64NotEqual} x y (CMPWconst [0] bool))
 (LessEqualU (InvertFlags x)) -> (GreaterEqualU x)
 (GreaterEqual (InvertFlags x)) -> (LessEqual x)
 (GreaterEqualU (InvertFlags x)) -> (LessEqualU x)
+(LessThanF (InvertFlags x)) -> (GreaterThanF x)
+(LessEqualF (InvertFlags x)) -> (GreaterEqualF x)
+(GreaterThanF (InvertFlags x)) -> (LessThanF x)
+(GreaterEqualF (InvertFlags x)) -> (LessEqualF x)
 
 // Boolean-generating instructions always
 // zero upper bit of the register; no need to zero-extend
index 2a65d547bdaa7a97bbc1620238e4f4a2ca0e06b7..b6bf10315e0c66e9bfd6da9994ced9b1a126ad95 100644 (file)
@@ -158,6 +158,7 @@ func init() {
                fp21      = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
                fp31      = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
                fp2flags  = regInfo{inputs: []regMask{fp, fp}}
+               fp1flags  = regInfo{inputs: []regMask{fp}}
                fpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
                fp2load   = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{fp}}
                fpstore   = regInfo{inputs: []regMask{gpspsbg, fp}}
@@ -271,6 +272,8 @@ func init() {
                {name: "TSTWconst", argLength: 1, reg: gp1flags, asm: "TSTW", aux: "Int32", typ: "Flags"}, // arg0 & auxInt compare to 0, 32 bit
                {name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"},                  // arg0 compare to arg1, float32
                {name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"},                  // arg0 compare to arg1, float64
+               {name: "FCMPS0", argLength: 1, reg: fp1flags, asm: "FCMPS", typ: "Flags"},                 // arg0 compare to 0, float32
+               {name: "FCMPD0", argLength: 1, reg: fp1flags, asm: "FCMPD", typ: "Flags"},                 // arg0 compare to 0, float64
 
                // shifted ops
                {name: "MVNshiftLL", argLength: 1, reg: gp11, asm: "MVN", aux: "Int64"},                   // ^(arg0<<auxInt)
index b50532fb693ca0e790765e77276e3c426bfdd83e..9222f52b58bca43e2c9c29b83f3d4fd98a57e8ff 100644 (file)
@@ -1218,6 +1218,8 @@ const (
        OpARM64TSTWconst
        OpARM64FCMPS
        OpARM64FCMPD
+       OpARM64FCMPS0
+       OpARM64FCMPD0
        OpARM64MVNshiftLL
        OpARM64MVNshiftRL
        OpARM64MVNshiftRA
@@ -16146,6 +16148,26 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "FCMPS0",
+               argLen: 1,
+               asm:    arm64.AFCMPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
+       {
+               name:   "FCMPD0",
+               argLen: 1,
+               asm:    arm64.AFCMPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
        {
                name:    "MVNshiftLL",
                auxType: auxInt64,
index 7ad04ead93d2ee4ee35d26469feafea286fb7776..a8acb4fec8ae27927ea9c6fbd5e20b424f4f9a4e 100644 (file)
@@ -95,6 +95,10 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64FADDD_0(v)
        case OpARM64FADDS:
                return rewriteValueARM64_OpARM64FADDS_0(v)
+       case OpARM64FCMPD:
+               return rewriteValueARM64_OpARM64FCMPD_0(v)
+       case OpARM64FCMPS:
+               return rewriteValueARM64_OpARM64FCMPS_0(v)
        case OpARM64FMOVDfpgp:
                return rewriteValueARM64_OpARM64FMOVDfpgp_0(v)
        case OpARM64FMOVDgpfp:
@@ -133,18 +137,26 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64FSUBS_0(v)
        case OpARM64GreaterEqual:
                return rewriteValueARM64_OpARM64GreaterEqual_0(v)
+       case OpARM64GreaterEqualF:
+               return rewriteValueARM64_OpARM64GreaterEqualF_0(v)
        case OpARM64GreaterEqualU:
                return rewriteValueARM64_OpARM64GreaterEqualU_0(v)
        case OpARM64GreaterThan:
                return rewriteValueARM64_OpARM64GreaterThan_0(v)
+       case OpARM64GreaterThanF:
+               return rewriteValueARM64_OpARM64GreaterThanF_0(v)
        case OpARM64GreaterThanU:
                return rewriteValueARM64_OpARM64GreaterThanU_0(v)
        case OpARM64LessEqual:
                return rewriteValueARM64_OpARM64LessEqual_0(v)
+       case OpARM64LessEqualF:
+               return rewriteValueARM64_OpARM64LessEqualF_0(v)
        case OpARM64LessEqualU:
                return rewriteValueARM64_OpARM64LessEqualU_0(v)
        case OpARM64LessThan:
                return rewriteValueARM64_OpARM64LessThan_0(v)
+       case OpARM64LessThanF:
+               return rewriteValueARM64_OpARM64LessThanF_0(v)
        case OpARM64LessThanU:
                return rewriteValueARM64_OpARM64LessThanU_0(v)
        case OpARM64MADD:
@@ -5224,6 +5236,88 @@ func rewriteValueARM64_OpARM64FADDS_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64FCMPD_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (FCMPD x (FMOVDconst [0]))
+       // cond:
+       // result: (FCMPD0 x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FMOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARM64FCMPD0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (FCMPD (FMOVDconst [0]) x)
+       // cond:
+       // result: (InvertFlags (FCMPD0 x))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FMOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpARM64FCMPD0, types.TypeFlags)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64FCMPS_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (FCMPS x (FMOVSconst [0]))
+       // cond:
+       // result: (FCMPS0 x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FMOVSconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARM64FCMPS0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (FCMPS (FMOVSconst [0]) x)
+       // cond:
+       // result: (InvertFlags (FCMPS0 x))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FMOVSconst {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpARM64FCMPS0, types.TypeFlags)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64FMOVDfpgp_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -6310,6 +6404,22 @@ func rewriteValueARM64_OpARM64GreaterEqual_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64GreaterEqualF_0(v *Value) bool {
+       // match: (GreaterEqualF (InvertFlags x))
+       // cond:
+       // result: (LessEqualF x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpARM64LessEqualF)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64GreaterEqualU_0(v *Value) bool {
        // match: (GreaterEqualU (FlagEQ))
        // cond:
@@ -6462,6 +6572,22 @@ func rewriteValueARM64_OpARM64GreaterThan_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64GreaterThanF_0(v *Value) bool {
+       // match: (GreaterThanF (InvertFlags x))
+       // cond:
+       // result: (LessThanF x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpARM64LessThanF)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64GreaterThanU_0(v *Value) bool {
        // match: (GreaterThanU (FlagEQ))
        // cond:
@@ -6614,6 +6740,22 @@ func rewriteValueARM64_OpARM64LessEqual_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64LessEqualF_0(v *Value) bool {
+       // match: (LessEqualF (InvertFlags x))
+       // cond:
+       // result: (GreaterEqualF x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpARM64GreaterEqualF)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64LessEqualU_0(v *Value) bool {
        // match: (LessEqualU (FlagEQ))
        // cond:
@@ -6766,6 +6908,22 @@ func rewriteValueARM64_OpARM64LessThan_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64LessThanF_0(v *Value) bool {
+       // match: (LessThanF (InvertFlags x))
+       // cond:
+       // result: (GreaterThanF x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpARM64GreaterThanF)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64LessThanU_0(v *Value) bool {
        // match: (LessThanU (FlagEQ))
        // cond: