]> Cypherpunks repositories - gostls13.git/commit
cmd/compile: optimize arm64 comparison of x and 0.0 with "FCMP $(0.0), Fn"
authorfanzha02 <fannie.zhang@arm.com>
Fri, 15 Feb 2019 11:21:46 +0000 (11:21 +0000)
committerCherry Zhang <cherryyz@google.com>
Thu, 7 Mar 2019 22:04:09 +0000 (22:04 +0000)
commit27cce773d3338d282039fd250c0f9bff3ecab3b0
tree256a0a73d6f37b641cd5e675398334f87dc78e3c
parent6efd51c6b768ecb55cd39b0dcb8a43d9a6c8e1b2
cmd/compile: optimize arm64 comparison of x and 0.0 with "FCMP $(0.0), Fn"

Code:
func comp(x float64) bool {return x < 0}

Previous version:
  FMOVD "".x(FP), F0
  FMOVD ZR, F1
  FCMPD F1, F0
  CSET MI, R0
  MOVB R0, "".~r1+8(FP)
  RET (R30)

Optimized version:
  FMOVD "".x(FP), F0
  FCMPD $(0.0), F0
  CSET MI, R0
  MOVB R0, "".~r1+8(FP)
  RET (R30)

Math package benchmark results:
name                   old time/op          new time/op          delta
Acos-8                   77.500000ns +- 0%    77.400000ns +- 0%   -0.13%  (p=0.000 n=9+10)
Acosh-8                  98.600000ns +- 0%    98.100000ns +- 0%   -0.51%  (p=0.000 n=10+9)
Asin-8                   67.600000ns +- 0%    66.600000ns +- 0%   -1.48%  (p=0.000 n=9+10)
Asinh-8                 108.000000ns +- 0%   109.000000ns +- 0%   +0.93%  (p=0.000 n=10+10)
Atan-8                   36.788889ns +- 0%    36.000000ns +- 0%   -2.14%  (p=0.000 n=9+10)
Atanh-8                 104.000000ns +- 0%   105.000000ns +- 0%   +0.96%  (p=0.000 n=10+10)
Atan2-8                  67.100000ns +- 0%    66.600000ns +- 0%   -0.75%  (p=0.000 n=10+10)
Cbrt-8                   89.100000ns +- 0%    82.000000ns +- 0%   -7.97%  (p=0.000 n=10+10)
Erf-8                    43.500000ns +- 0%    43.000000ns +- 0%   -1.15%  (p=0.000 n=10+10)
Erfc-8                   49.000000ns +- 0%    48.220000ns +- 0%   -1.59%  (p=0.000 n=9+10)
Erfinv-8                 59.100000ns +- 0%    58.600000ns +- 0%   -0.85%  (p=0.000 n=10+10)
Erfcinv-8                59.100000ns +- 0%    58.600000ns +- 0%   -0.85%  (p=0.000 n=10+10)
Expm1-8                  56.600000ns +- 0%    56.040000ns +- 0%   -0.99%  (p=0.000 n=8+10)
Exp2Go-8                 97.600000ns +- 0%    99.400000ns +- 0%   +1.84%  (p=0.000 n=10+10)
Dim-8                     2.500000ns +- 0%     2.250000ns +- 0%  -10.00%  (p=0.000 n=10+10)
Mod-8                   108.000000ns +- 0%   106.000000ns +- 0%   -1.85%  (p=0.000 n=8+8)
Frexp-8                  12.000000ns +- 0%    12.500000ns +- 0%   +4.17%  (p=0.000 n=10+10)
Gamma-8                  67.100000ns +- 0%    67.600000ns +- 0%   +0.75%  (p=0.000 n=10+10)
Hypot-8                  17.100000ns +- 0%    17.000000ns +- 0%   -0.58%  (p=0.002 n=8+10)
Ilogb-8                   9.010000ns +- 0%     8.510000ns +- 0%   -5.55%  (p=0.000 n=10+9)
J1-8                    288.000000ns +- 0%   287.000000ns +- 0%   -0.35%  (p=0.000 n=10+10)
Jn-8                    605.000000ns +- 0%   604.000000ns +- 0%   -0.17%  (p=0.001 n=8+9)
Logb-8                   10.600000ns +- 0%    10.500000ns +- 0%   -0.94%  (p=0.000 n=9+10)
Log2-8                   16.500000ns +- 0%    17.000000ns +- 0%   +3.03%  (p=0.000 n=10+10)
PowFrac-8               232.000000ns +- 0%   233.000000ns +- 0%   +0.43%  (p=0.000 n=10+10)
Remainder-8              70.600000ns +- 0%    69.600000ns +- 0%   -1.42%  (p=0.000 n=10+10)
SqrtGoLatency-8          77.600000ns +- 0%    76.600000ns +- 0%   -1.29%  (p=0.000 n=10+10)
Tanh-8                   97.600000ns +- 0%    94.100000ns +- 0%   -3.59%  (p=0.000 n=10+10)
Y1-8                    289.000000ns +- 0%   288.000000ns +- 0%   -0.35%  (p=0.000 n=10+10)
Yn-8                    603.000000ns +- 0%   589.000000ns +- 0%   -2.32%  (p=0.000 n=10+10)

Change-Id: I6920734f8662b329aa58f5b8e4eeae73b409984d
Reviewed-on: https://go-review.googlesource.com/c/go/+/164719
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go