]> Cypherpunks repositories - gostls13.git/commit
cmd/compile: use FCLASSD for subnormal checks on riscv64
authorMichael Munday <mndygolang+git@gmail.com>
Sat, 23 Aug 2025 23:15:29 +0000 (00:15 +0100)
committerGopher Robot <gobot@golang.org>
Wed, 12 Nov 2025 18:03:41 +0000 (10:03 -0800)
commit34aef8936657a5c4f6a561aeb959c2bb4eebae30
tree87c13bbdb782330b3afc3a96fe92a701e093dde9
parent0c28789bd7dfc55099cac86a3212dda0d6c091f6
cmd/compile: use FCLASSD for subnormal checks on riscv64

Only implemented for 64 bit floating point operations for now.

goos: linux
goarch: riscv64
pkg: math
cpu: Spacemit(R) X60
                    │       sec/op        │   sec/op     vs base                │
Acos                          154.1n ± 0%   154.1n ± 0%        ~ (p=0.303 n=10)
Acosh                         215.8n ± 6%   226.7n ± 0%        ~ (p=0.439 n=10)
Asin                          149.2n ± 1%   149.2n ± 0%        ~ (p=0.700 n=10)
Asinh                         262.1n ± 0%   258.5n ± 0%   -1.37% (p=0.000 n=10)
Atan                          99.48n ± 0%   99.49n ± 0%        ~ (p=0.836 n=10)
Atanh                         244.9n ± 0%   243.8n ± 0%   -0.43% (p=0.002 n=10)
Atan2                         158.2n ± 1%   153.3n ± 0%   -3.10% (p=0.000 n=10)
Cbrt                          186.8n ± 0%   181.1n ± 0%   -3.03% (p=0.000 n=10)
Ceil                          36.71n ± 1%   36.71n ± 0%        ~ (p=0.434 n=10)
Copysign                      6.531n ± 1%   6.526n ± 0%        ~ (p=0.268 n=10)
Cos                           98.19n ± 0%   95.40n ± 0%   -2.84% (p=0.000 n=10)
Cosh                          233.1n ± 0%   222.6n ± 0%   -4.50% (p=0.000 n=10)
Erf                           122.5n ± 0%   114.2n ± 0%   -6.78% (p=0.000 n=10)
Erfc                          126.0n ± 1%   116.6n ± 0%   -7.46% (p=0.000 n=10)
Erfinv                        138.8n ± 0%   138.6n ± 0%        ~ (p=0.082 n=10)
Erfcinv                       140.0n ± 0%   139.7n ± 0%        ~ (p=0.359 n=10)
Exp                           193.3n ± 0%   184.2n ± 0%   -4.68% (p=0.000 n=10)
ExpGo                         204.8n ± 0%   194.5n ± 0%   -5.03% (p=0.000 n=10)
Expm1                         152.5n ± 1%   145.0n ± 0%   -4.92% (p=0.000 n=10)
Exp2                          174.5n ± 0%   164.2n ± 0%   -5.85% (p=0.000 n=10)
Exp2Go                        184.4n ± 1%   175.4n ± 0%   -4.88% (p=0.000 n=10)
Abs                           4.912n ± 0%   4.914n ± 0%        ~ (p=0.283 n=10)
Dim                           15.50n ± 1%   15.52n ± 1%        ~ (p=0.331 n=10)
Floor                         36.89n ± 1%   36.76n ± 1%        ~ (p=0.325 n=10)
Max                           31.05n ± 1%   31.17n ± 1%        ~ (p=0.628 n=10)
Min                           31.01n ± 0%   31.06n ± 0%        ~ (p=0.767 n=10)
Mod                           294.1n ± 0%   245.6n ± 0%  -16.52% (p=0.000 n=10)
Frexp                         44.86n ± 1%   35.20n ± 0%  -21.53% (p=0.000 n=10)
Gamma                         195.8n ± 0%   185.4n ± 1%   -5.29% (p=0.000 n=10)
Hypot                         84.91n ± 0%   84.54n ± 1%   -0.43% (p=0.006 n=10)
HypotGo                       96.70n ± 0%   95.42n ± 1%   -1.32% (p=0.000 n=10)
Ilogb                         45.03n ± 0%   35.07n ± 1%  -22.10% (p=0.000 n=10)
J0                            634.5n ± 0%   627.2n ± 0%   -1.16% (p=0.000 n=10)
J1                            644.5n ± 0%   636.9n ± 0%   -1.18% (p=0.000 n=10)
Jn                            1.357µ ± 0%   1.344µ ± 0%   -0.92% (p=0.000 n=10)
Ldexp                         49.89n ± 0%   39.96n ± 0%  -19.90% (p=0.000 n=10)
Lgamma                        186.6n ± 0%   184.3n ± 0%   -1.21% (p=0.000 n=10)
Log                           150.4n ± 0%   141.1n ± 0%   -6.15% (p=0.000 n=10)
Logb                          46.70n ± 0%   35.89n ± 0%  -23.15% (p=0.000 n=10)
Log1p                         164.1n ± 0%   163.9n ± 0%        ~ (p=0.122 n=10)
Log10                         153.1n ± 0%   143.5n ± 0%   -6.24% (p=0.000 n=10)
Log2                          58.83n ± 0%   49.75n ± 0%  -15.43% (p=0.000 n=10)
Modf                          40.82n ± 1%   40.78n ± 0%        ~ (p=0.239 n=10)
Nextafter32                   49.15n ± 0%   48.93n ± 0%   -0.44% (p=0.011 n=10)
Nextafter64                   43.33n ± 0%   43.23n ± 0%        ~ (p=0.228 n=10)
PowInt                        269.4n ± 0%   243.8n ± 0%   -9.49% (p=0.000 n=10)
PowFrac                       618.0n ± 0%   571.7n ± 0%   -7.48% (p=0.000 n=10)
Pow10Pos                      13.09n ± 0%   13.05n ± 0%   -0.31% (p=0.003 n=10)
Pow10Neg                      30.99n ± 1%   30.99n ± 0%        ~ (p=0.173 n=10)
Round                         23.73n ± 0%   23.65n ± 0%   -0.36% (p=0.011 n=10)
RoundToEven                   27.87n ± 0%   27.73n ± 0%   -0.48% (p=0.003 n=10)
Remainder                     282.1n ± 0%   249.6n ± 0%  -11.52% (p=0.000 n=10)
Signbit                       11.46n ± 0%   11.42n ± 0%   -0.39% (p=0.003 n=10)
Sin                           115.2n ± 0%   113.2n ± 0%   -1.74% (p=0.000 n=10)
Sincos                        140.6n ± 0%   138.6n ± 0%   -1.39% (p=0.000 n=10)
Sinh                          252.0n ± 0%   241.4n ± 0%   -4.21% (p=0.000 n=10)
SqrtIndirect                  4.909n ± 0%   4.893n ± 0%   -0.34% (p=0.021 n=10)
SqrtLatency                   19.57n ± 1%   19.57n ± 0%        ~ (p=0.087 n=10)
SqrtIndirectLatency           19.64n ± 0%   19.57n ± 0%   -0.36% (p=0.025 n=10)
SqrtGoLatency                 198.1n ± 0%   197.4n ± 0%   -0.35% (p=0.014 n=10)
SqrtPrime                     5.733µ ± 0%   5.725µ ± 0%        ~ (p=0.116 n=10)
Tan                           149.1n ± 0%   146.8n ± 0%   -1.54% (p=0.000 n=10)
Tanh                          248.2n ± 1%   238.1n ± 0%   -4.05% (p=0.000 n=10)
Trunc                         36.86n ± 0%   36.70n ± 0%   -0.43% (p=0.029 n=10)
Y0                            638.2n ± 0%   633.6n ± 0%   -0.71% (p=0.000 n=10)
Y1                            641.8n ± 0%   636.1n ± 0%   -0.87% (p=0.000 n=10)
Yn                            1.358µ ± 0%   1.345µ ± 0%   -0.92% (p=0.000 n=10)
Float64bits                   5.721n ± 0%   5.709n ± 0%   -0.22% (p=0.044 n=10)
Float64frombits               4.905n ± 0%   4.893n ± 0%        ~ (p=0.266 n=10)
Float32bits                   12.27n ± 0%   12.23n ± 0%        ~ (p=0.122 n=10)
Float32frombits               4.909n ± 0%   4.893n ± 0%   -0.32% (p=0.024 n=10)
FMA                           6.556n ± 0%   6.526n ± 0%        ~ (p=0.283 n=10)
geomean                       86.82n        83.75n        -3.54%

Change-Id: I522297a79646d76543d516accce291f5a3cea337
Reviewed-on: https://go-review.googlesource.com/c/go/+/717560
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
src/cmd/compile/internal/ssa/_gen/RISCV64.rules
src/cmd/compile/internal/ssa/rewriteRISCV64.go
src/cmd/compile/internal/test/float_test.go
test/codegen/floats.go