]> Cypherpunks repositories - gostls13.git/commit
math: implement IsInf using Abs
authorMichael Munday <mndygolang+git@gmail.com>
Mon, 11 Aug 2025 22:26:59 +0000 (23:26 +0100)
committerSean Liao <sean@liao.dev>
Mon, 25 Aug 2025 19:43:51 +0000 (12:43 -0700)
commit4e05a070c4f88ebf43f0873e144c62d02d56060b
tree7095f9935fc4562657916e926ca8e2705a1eff75
parent1eed4f32a06ac1083120663a8451cb9cfeb7b496
math: implement IsInf using Abs

Abs is an intrinsic (or a relatively cheap operation) on most
architectures. Using it in IsInf typically saves a branch when
`sign` is 0 (note the `sign` variable is typically a constant).

This change doesn't make a huge difference on amd64 (these
benchmarks are fairly noisy too) but removing the branch will
allow rewrite rules to detect and optimize infinity checks on
other architectures. For example, riscv64 can check for
infinities with the FCLASSD instruction and s390x can use the
TCDB instruction.

goos: linux
goarch: amd64
pkg: math
cpu: 12th Gen Intel(R) Core(TM) i7-12700T
                    │          sec/op          │    sec/op      vs base                │
Acos                              4.317n ±  1%    4.321n ±  0%        ~ (p=0.466 n=10)
Acosh                             8.857n ±  1%    8.411n ±  2%   -5.05% (p=0.001 n=10)
Asin                              4.260n ±  1%    4.204n ±  6%   -1.31% (p=0.021 n=10)
Asinh                             10.63n ±  2%    10.37n ±  0%   -2.49% (p=0.000 n=10)
Atan                              2.493n ±  1%    2.368n ±  0%   -5.01% (p=0.000 n=10)
Atanh                             8.820n ±  4%    8.770n ±  2%        ~ (p=0.579 n=10)
Atan2                             4.212n ±  1%    4.066n ± 11%   -3.45% (p=0.023 n=10)
Cbrt                              4.859n ±  0%    4.845n ±  0%   -0.29% (p=0.000 n=10)
Ceil                             0.3877n ±  3%   0.2514n ±  0%  -35.17% (p=0.000 n=10)
Copysign                         0.3479n ±  2%   0.4179n ±  0%  +20.14% (p=0.000 n=10)
Cos                               4.734n ±  2%    4.486n ±  0%   -5.26% (p=0.000 n=10)
Cosh                              5.244n ±  0%    5.071n ±  0%   -3.29% (p=0.000 n=10)
Erf                               2.975n ±  1%    2.788n ±  0%   -6.29% (p=0.000 n=10)
Erfc                              3.259n ±  1%    3.121n ±  0%   -4.23% (p=0.000 n=10)
Erfinv                            4.015n ±  1%    3.904n ±  0%   -2.76% (p=0.000 n=10)
Erfcinv                           4.166n ±  1%    4.039n ±  0%   -3.04% (p=0.000 n=10)
Exp                               3.567n ±  1%    3.429n ±  0%   -3.87% (p=0.000 n=10)
ExpGo                             9.173n ±  1%    8.368n ±  2%   -8.78% (p=0.000 n=10)
Expm1                             4.466n ±  3%    4.419n ±  0%   -1.05% (p=0.000 n=10)
Exp2                              8.328n ±  0%    8.046n ±  0%   -3.39% (p=0.000 n=10)
Exp2Go                            8.796n ±  5%    8.237n ±  2%   -6.36% (p=0.000 n=10)
Abs                              0.2400n ±  2%   0.2144n ±  0%  -10.71% (p=0.000 n=10)
Dim                              0.4077n ±  3%   0.3795n ±  1%   -6.91% (p=0.000 n=10)
Floor                            0.3616n ±  2%   0.2528n ±  3%  -30.10% (p=0.000 n=10)
Max                               1.401n ±  1%    1.344n ±  1%   -4.14% (p=0.000 n=10)
Min                               1.391n ±  1%    1.345n ±  1%   -3.27% (p=0.000 n=10)
Mod                               15.45n ±  1%    15.62n ±  2%        ~ (p=0.066 n=10)
Frexp                             1.838n ±  2%    1.605n ±  1%  -12.70% (p=0.000 n=10)
Gamma                             4.465n ±  1%    4.458n ±  1%        ~ (p=0.256 n=10)
Hypot                             2.237n ±  1%    2.208n ±  0%   -1.32% (p=0.000 n=10)
HypotGo                           2.610n ±  3%    2.663n ±  5%        ~ (p=0.280 n=10)
Ilogb                             1.793n ±  1%    1.566n ±  1%  -12.66% (p=0.000 n=10)
J0                                22.11n ±  1%    21.45n ±  1%   -2.99% (p=0.000 n=10)
J1                                21.71n ±  1%    21.38n ±  1%   -1.54% (p=0.000 n=10)
Jn                                46.43n ±  1%    45.83n ±  1%   -1.30% (p=0.001 n=10)
Ldexp                             2.360n ±  1%    2.111n ±  1%  -10.51% (p=0.000 n=10)
Lgamma                            4.728n ±  1%    4.850n ±  2%   +2.59% (p=0.000 n=10)
Log                               4.304n ±  2%    4.228n ±  1%   -1.78% (p=0.000 n=10)
Logb                              1.833n ±  2%    1.635n ±  2%  -10.80% (p=0.000 n=10)
Log1p                             5.262n ±  2%    5.173n ±  2%   -1.69% (p=0.001 n=10)
Log10                             4.534n ±  1%    4.474n ±  1%   -1.33% (p=0.024 n=10)
Log2                              2.510n ±  2%    2.246n ±  2%  -10.48% (p=0.000 n=10)
Modf                              1.712n ±  3%    1.700n ±  1%        ~ (p=0.055 n=10)
Nextafter32                       2.190n ±  3%    2.187n ±  0%        ~ (p=0.266 n=10)
Nextafter64                       2.184n ±  0%    2.183n ±  0%   -0.05% (p=0.017 n=10)
PowInt                            11.45n ±  7%    11.32n ±  9%        ~ (p=0.137 n=10)
PowFrac                           27.46n ±  3%    27.04n ±  1%   -1.55% (p=0.001 n=10)
Pow10Pos                         0.5367n ±  3%   0.5466n ±  2%   +1.84% (p=0.009 n=10)
Pow10Neg                         0.8939n ±  1%   0.8720n ±  2%   -2.45% (p=0.000 n=10)
Round                             1.218n ±  1%    1.198n ±  1%   -1.56% (p=0.005 n=10)
RoundToEven                       1.711n ±  0%    1.710n ±  0%        ~ (p=0.464 n=10)
Remainder                         12.87n ± 10%    13.79n ± 14%   +7.11% (p=0.027 n=10)
Signbit                          0.4072n ±  2%   0.3839n ±  2%   -5.71% (p=0.000 n=10)
Sin                               4.102n ±  1%    4.058n ±  3%        ~ (p=0.138 n=10)
Sincos                            5.837n ±  1%    5.715n ±  2%   -2.10% (p=0.000 n=10)
Sinh                              5.622n ±  1%    5.567n ±  2%   -0.96% (p=0.006 n=10)
SqrtIndirect                     0.4284n ±  0%   0.4279n ±  0%        ~ (p=0.084 n=10)
SqrtLatency                       2.779n ±  0%    2.777n ±  0%        ~ (p=0.089 n=10)
SqrtIndirectLatency               2.777n ±  0%    2.778n ±  0%        ~ (p=0.305 n=10)
SqrtGoLatency                     24.00n ±  0%    24.51n ±  0%   +2.12% (p=0.000 n=10)
SqrtPrime                         673.0n ±  0%    673.0n ±  0%        ~ (p=0.574 n=10)
Tan                               4.111n ±  4%    4.123n ±  5%        ~ (p=0.424 n=10)
Tanh                              5.787n ±  1%    5.723n ±  1%   -1.11% (p=0.010 n=10)
Trunc                            0.3441n ±  3%   0.2596n ±  2%  -24.56% (p=0.000 n=10)
Y0                                21.63n ±  2%    21.07n ±  2%   -2.61% (p=0.001 n=10)
Y1                                21.42n ±  1%    20.93n ±  3%   -2.29% (p=0.041 n=10)
Yn                                45.78n ±  1%    45.83n ±  1%        ~ (p=0.671 n=10)
Float64bits                      0.2187n ±  2%   0.2199n ±  2%        ~ (p=0.138 n=10)
Float64frombits                  0.2198n ±  1%   0.2199n ±  1%        ~ (p=0.956 n=10)
Float32bits                      0.2237n ±  2%   0.2213n ±  1%        ~ (p=0.060 n=10)
Float32frombits                  0.2251n ±  1%   0.2219n ±  2%   -1.42% (p=0.000 n=10)
FMA                              0.8557n ±  1%   0.8555n ±  0%        ~ (p=0.286 n=10)
geomean                           3.186n          3.070n         -3.61%

Change-Id: I4814bb1e3d9d20e9d8cd7689e8d5383e36b00331
Reviewed-on: https://go-review.googlesource.com/c/go/+/694955
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Sean Liao <sean@liao.dev>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
src/math/bits.go