]> Cypherpunks repositories - gostls13.git/commitdiff
math: implement IsInf using Abs
authorMichael Munday <mndygolang+git@gmail.com>
Mon, 11 Aug 2025 22:26:59 +0000 (23:26 +0100)
committerSean Liao <sean@liao.dev>
Mon, 25 Aug 2025 19:43:51 +0000 (12:43 -0700)
Abs is an intrinsic (or a relatively cheap operation) on most
architectures. Using it in IsInf typically saves a branch when
`sign` is 0 (note the `sign` variable is typically a constant).

This change doesn't make a huge difference on amd64 (these
benchmarks are fairly noisy too) but removing the branch will
allow rewrite rules to detect and optimize infinity checks on
other architectures. For example, riscv64 can check for
infinities with the FCLASSD instruction and s390x can use the
TCDB instruction.

goos: linux
goarch: amd64
pkg: math
cpu: 12th Gen Intel(R) Core(TM) i7-12700T
                    │          sec/op          │    sec/op      vs base                │
Acos                              4.317n ±  1%    4.321n ±  0%        ~ (p=0.466 n=10)
Acosh                             8.857n ±  1%    8.411n ±  2%   -5.05% (p=0.001 n=10)
Asin                              4.260n ±  1%    4.204n ±  6%   -1.31% (p=0.021 n=10)
Asinh                             10.63n ±  2%    10.37n ±  0%   -2.49% (p=0.000 n=10)
Atan                              2.493n ±  1%    2.368n ±  0%   -5.01% (p=0.000 n=10)
Atanh                             8.820n ±  4%    8.770n ±  2%        ~ (p=0.579 n=10)
Atan2                             4.212n ±  1%    4.066n ± 11%   -3.45% (p=0.023 n=10)
Cbrt                              4.859n ±  0%    4.845n ±  0%   -0.29% (p=0.000 n=10)
Ceil                             0.3877n ±  3%   0.2514n ±  0%  -35.17% (p=0.000 n=10)
Copysign                         0.3479n ±  2%   0.4179n ±  0%  +20.14% (p=0.000 n=10)
Cos                               4.734n ±  2%    4.486n ±  0%   -5.26% (p=0.000 n=10)
Cosh                              5.244n ±  0%    5.071n ±  0%   -3.29% (p=0.000 n=10)
Erf                               2.975n ±  1%    2.788n ±  0%   -6.29% (p=0.000 n=10)
Erfc                              3.259n ±  1%    3.121n ±  0%   -4.23% (p=0.000 n=10)
Erfinv                            4.015n ±  1%    3.904n ±  0%   -2.76% (p=0.000 n=10)
Erfcinv                           4.166n ±  1%    4.039n ±  0%   -3.04% (p=0.000 n=10)
Exp                               3.567n ±  1%    3.429n ±  0%   -3.87% (p=0.000 n=10)
ExpGo                             9.173n ±  1%    8.368n ±  2%   -8.78% (p=0.000 n=10)
Expm1                             4.466n ±  3%    4.419n ±  0%   -1.05% (p=0.000 n=10)
Exp2                              8.328n ±  0%    8.046n ±  0%   -3.39% (p=0.000 n=10)
Exp2Go                            8.796n ±  5%    8.237n ±  2%   -6.36% (p=0.000 n=10)
Abs                              0.2400n ±  2%   0.2144n ±  0%  -10.71% (p=0.000 n=10)
Dim                              0.4077n ±  3%   0.3795n ±  1%   -6.91% (p=0.000 n=10)
Floor                            0.3616n ±  2%   0.2528n ±  3%  -30.10% (p=0.000 n=10)
Max                               1.401n ±  1%    1.344n ±  1%   -4.14% (p=0.000 n=10)
Min                               1.391n ±  1%    1.345n ±  1%   -3.27% (p=0.000 n=10)
Mod                               15.45n ±  1%    15.62n ±  2%        ~ (p=0.066 n=10)
Frexp                             1.838n ±  2%    1.605n ±  1%  -12.70% (p=0.000 n=10)
Gamma                             4.465n ±  1%    4.458n ±  1%        ~ (p=0.256 n=10)
Hypot                             2.237n ±  1%    2.208n ±  0%   -1.32% (p=0.000 n=10)
HypotGo                           2.610n ±  3%    2.663n ±  5%        ~ (p=0.280 n=10)
Ilogb                             1.793n ±  1%    1.566n ±  1%  -12.66% (p=0.000 n=10)
J0                                22.11n ±  1%    21.45n ±  1%   -2.99% (p=0.000 n=10)
J1                                21.71n ±  1%    21.38n ±  1%   -1.54% (p=0.000 n=10)
Jn                                46.43n ±  1%    45.83n ±  1%   -1.30% (p=0.001 n=10)
Ldexp                             2.360n ±  1%    2.111n ±  1%  -10.51% (p=0.000 n=10)
Lgamma                            4.728n ±  1%    4.850n ±  2%   +2.59% (p=0.000 n=10)
Log                               4.304n ±  2%    4.228n ±  1%   -1.78% (p=0.000 n=10)
Logb                              1.833n ±  2%    1.635n ±  2%  -10.80% (p=0.000 n=10)
Log1p                             5.262n ±  2%    5.173n ±  2%   -1.69% (p=0.001 n=10)
Log10                             4.534n ±  1%    4.474n ±  1%   -1.33% (p=0.024 n=10)
Log2                              2.510n ±  2%    2.246n ±  2%  -10.48% (p=0.000 n=10)
Modf                              1.712n ±  3%    1.700n ±  1%        ~ (p=0.055 n=10)
Nextafter32                       2.190n ±  3%    2.187n ±  0%        ~ (p=0.266 n=10)
Nextafter64                       2.184n ±  0%    2.183n ±  0%   -0.05% (p=0.017 n=10)
PowInt                            11.45n ±  7%    11.32n ±  9%        ~ (p=0.137 n=10)
PowFrac                           27.46n ±  3%    27.04n ±  1%   -1.55% (p=0.001 n=10)
Pow10Pos                         0.5367n ±  3%   0.5466n ±  2%   +1.84% (p=0.009 n=10)
Pow10Neg                         0.8939n ±  1%   0.8720n ±  2%   -2.45% (p=0.000 n=10)
Round                             1.218n ±  1%    1.198n ±  1%   -1.56% (p=0.005 n=10)
RoundToEven                       1.711n ±  0%    1.710n ±  0%        ~ (p=0.464 n=10)
Remainder                         12.87n ± 10%    13.79n ± 14%   +7.11% (p=0.027 n=10)
Signbit                          0.4072n ±  2%   0.3839n ±  2%   -5.71% (p=0.000 n=10)
Sin                               4.102n ±  1%    4.058n ±  3%        ~ (p=0.138 n=10)
Sincos                            5.837n ±  1%    5.715n ±  2%   -2.10% (p=0.000 n=10)
Sinh                              5.622n ±  1%    5.567n ±  2%   -0.96% (p=0.006 n=10)
SqrtIndirect                     0.4284n ±  0%   0.4279n ±  0%        ~ (p=0.084 n=10)
SqrtLatency                       2.779n ±  0%    2.777n ±  0%        ~ (p=0.089 n=10)
SqrtIndirectLatency               2.777n ±  0%    2.778n ±  0%        ~ (p=0.305 n=10)
SqrtGoLatency                     24.00n ±  0%    24.51n ±  0%   +2.12% (p=0.000 n=10)
SqrtPrime                         673.0n ±  0%    673.0n ±  0%        ~ (p=0.574 n=10)
Tan                               4.111n ±  4%    4.123n ±  5%        ~ (p=0.424 n=10)
Tanh                              5.787n ±  1%    5.723n ±  1%   -1.11% (p=0.010 n=10)
Trunc                            0.3441n ±  3%   0.2596n ±  2%  -24.56% (p=0.000 n=10)
Y0                                21.63n ±  2%    21.07n ±  2%   -2.61% (p=0.001 n=10)
Y1                                21.42n ±  1%    20.93n ±  3%   -2.29% (p=0.041 n=10)
Yn                                45.78n ±  1%    45.83n ±  1%        ~ (p=0.671 n=10)
Float64bits                      0.2187n ±  2%   0.2199n ±  2%        ~ (p=0.138 n=10)
Float64frombits                  0.2198n ±  1%   0.2199n ±  1%        ~ (p=0.956 n=10)
Float32bits                      0.2237n ±  2%   0.2213n ±  1%        ~ (p=0.060 n=10)
Float32frombits                  0.2251n ±  1%   0.2219n ±  2%   -1.42% (p=0.000 n=10)
FMA                              0.8557n ±  1%   0.8555n ±  0%        ~ (p=0.286 n=10)
geomean                           3.186n          3.070n         -3.61%

Change-Id: I4814bb1e3d9d20e9d8cd7689e8d5383e36b00331
Reviewed-on: https://go-review.googlesource.com/c/go/+/694955
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Sean Liao <sean@liao.dev>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
src/math/bits.go

index c5cb93b15945d47365865da990e93e46e430c1bd..3716a411f4f1b81bb99f441f0102ab3082e8477b 100644 (file)
@@ -48,7 +48,12 @@ func IsInf(f float64, sign int) bool {
        // To avoid the floating-point hardware, could use:
        //      x := Float64bits(f);
        //      return sign >= 0 && x == uvinf || sign <= 0 && x == uvneginf;
-       return sign >= 0 && f > MaxFloat64 || sign <= 0 && f < -MaxFloat64
+       if sign == 0 {
+               f = Abs(f)
+       } else if sign < 0 {
+               f = -f
+       }
+       return f > MaxFloat64
 }
 
 // normalize returns a normal number y and exponent exp