From: Junxian Zhu Date: Mon, 3 Apr 2023 07:26:54 +0000 (+0800) Subject: math: optimize math.Abs on mips64x X-Git-Tag: go1.21rc1~667 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=574431cfcd51075ea864c40753dbf86c1851802f;p=gostls13.git math: optimize math.Abs on mips64x This commit optimized math.Abs function implementation on mips64x. Tested on loongson 3A2000. goos: linux goarch: mips64le pkg: math │ oldmath │ newmath │ │ sec/op │ sec/op vs base │ Acos-4 258.0n ± ∞ ¹ 257.1n ± ∞ ¹ -0.35% (p=0.008 n=5) Acosh-4 417.0n ± ∞ ¹ 377.9n ± ∞ ¹ -9.38% (p=0.008 n=5) Asin-4 248.0n ± ∞ ¹ 259.9n ± ∞ ¹ +4.80% (p=0.008 n=5) Asinh-4 439.6n ± ∞ ¹ 408.3n ± ∞ ¹ -7.12% (p=0.008 n=5) Atan-4 189.6n ± ∞ ¹ 188.8n ± ∞ ¹ ~ (p=0.056 n=5) Atanh-4 390.0n ± ∞ ¹ 356.4n ± ∞ ¹ -8.62% (p=0.008 n=5) Atan2-4 279.0n ± ∞ ¹ 263.9n ± ∞ ¹ -5.41% (p=0.008 n=5) Cbrt-4 314.2n ± ∞ ¹ 322.3n ± ∞ ¹ +2.58% (p=0.008 n=5) Ceil-4 139.7n ± ∞ ¹ 136.6n ± ∞ ¹ -2.22% (p=0.008 n=5) Compare-4 21.11n ± ∞ ¹ 21.09n ± ∞ ¹ ~ (p=0.405 n=5) Compare32-4 20.10n ± ∞ ¹ 20.12n ± ∞ ¹ ~ (p=0.206 n=5) Copysign-4 32.17n ± ∞ ¹ 35.71n ± ∞ ¹ +11.00% (p=0.008 n=5) Cos-4 222.8n ± ∞ ¹ 169.8n ± ∞ ¹ -23.79% (p=0.008 n=5) Cosh-4 550.2n ± ∞ ¹ 477.4n ± ∞ ¹ -13.23% (p=0.008 n=5) Erf-4 171.6n ± ∞ ¹ 174.5n ± ∞ ¹ ~ (p=0.635 n=5) Erfc-4 182.6n ± ∞ ¹ 170.2n ± ∞ ¹ -6.79% (p=0.008 n=5) Erfinv-4 177.6n ± ∞ ¹ 196.6n ± ∞ ¹ +10.70% (p=0.008 n=5) Erfcinv-4 177.8n ± ∞ ¹ 197.8n ± ∞ ¹ +11.25% (p=0.008 n=5) Exp-4 422.8n ± ∞ ¹ 382.1n ± ∞ ¹ -9.63% (p=0.008 n=5) ExpGo-4 416.1n ± ∞ ¹ 383.2n ± ∞ ¹ -7.91% (p=0.008 n=5) Expm1-4 232.9n ± ∞ ¹ 252.2n ± ∞ ¹ +8.29% (p=0.008 n=5) Exp2-4 404.8n ± ∞ ¹ 389.1n ± ∞ ¹ -3.88% (p=0.008 n=5) Exp2Go-4 407.0n ± ∞ ¹ 372.3n ± ∞ ¹ -8.53% (p=0.008 n=5) Abs-4 30.120n ± ∞ ¹ 3.014n ± ∞ ¹ -89.99% (p=0.008 n=5) Dim-4 5.021n ± ∞ ¹ 5.023n ± ∞ ¹ ~ (p=0.071 n=5) Floor-4 127.8n ± ∞ ¹ 127.1n ± ∞ ¹ -0.55% (p=0.008 n=5) Max-4 77.69n ± ∞ ¹ 76.33n ± ∞ ¹ -1.75% (p=0.008 n=5) Min-4 83.27n ± ∞ ¹ 77.87n ± ∞ ¹ -6.48% (p=0.008 n=5) Mod-4 906.2n ± ∞ ¹ 692.9n ± ∞ ¹ -23.54% (p=0.008 n=5) Frexp-4 150.6n ± ∞ ¹ 108.6n ± ∞ ¹ -27.89% (p=0.008 n=5) Gamma-4 418.4n ± ∞ ¹ 386.1n ± ∞ ¹ -7.72% (p=0.008 n=5) Hypot-4 148.20n ± ∞ ¹ 93.78n ± ∞ ¹ -36.72% (p=0.008 n=5) HypotGo-4 148.20n ± ∞ ¹ 94.47n ± ∞ ¹ -36.26% (p=0.008 n=5) Ilogb-4 135.50n ± ∞ ¹ 92.38n ± ∞ ¹ -31.82% (p=0.008 n=5) J0-4 937.7n ± ∞ ¹ 861.7n ± ∞ ¹ -8.10% (p=0.008 n=5) J1-4 915.4n ± ∞ ¹ 875.9n ± ∞ ¹ -4.32% (p=0.008 n=5) Jn-4 1.974µ ± ∞ ¹ 1.863µ ± ∞ ¹ -5.62% (p=0.008 n=5) Ldexp-4 158.5n ± ∞ ¹ 129.3n ± ∞ ¹ -18.42% (p=0.008 n=5) Lgamma-4 209.0n ± ∞ ¹ 211.8n ± ∞ ¹ ~ (p=0.095 n=5) Log-4 326.4n ± ∞ ¹ 295.2n ± ∞ ¹ -9.56% (p=0.008 n=5) Logb-4 147.7n ± ∞ ¹ 105.0n ± ∞ ¹ -28.91% (p=0.008 n=5) Log1p-4 303.4n ± ∞ ¹ 266.3n ± ∞ ¹ -12.23% (p=0.008 n=5) Log10-4 329.2n ± ∞ ¹ 298.3n ± ∞ ¹ -9.39% (p=0.008 n=5) Log2-4 187.4n ± ∞ ¹ 153.0n ± ∞ ¹ -18.36% (p=0.008 n=5) Modf-4 110.5n ± ∞ ¹ 103.5n ± ∞ ¹ -6.33% (p=0.008 n=5) Nextafter32-4 128.4n ± ∞ ¹ 121.5n ± ∞ ¹ -5.37% (p=0.016 n=5) Nextafter64-4 109.5n ± ∞ ¹ 110.5n ± ∞ ¹ +0.91% (p=0.008 n=5) PowInt-4 603.3n ± ∞ ¹ 516.4n ± ∞ ¹ -14.40% (p=0.008 n=5) PowFrac-4 1.365µ ± ∞ ¹ 1.183µ ± ∞ ¹ -13.33% (p=0.008 n=5) Pow10Pos-4 15.07n ± ∞ ¹ 15.07n ± ∞ ¹ ~ (p=0.738 n=5) Pow10Neg-4 21.11n ± ∞ ¹ 21.10n ± ∞ ¹ ~ (p=0.190 n=5) Round-4 44.23n ± ∞ ¹ 44.22n ± ∞ ¹ ~ (p=0.635 n=5) RoundToEven-4 50.25n ± ∞ ¹ 46.27n ± ∞ ¹ -7.92% (p=0.008 n=5) Remainder-4 675.6n ± ∞ ¹ 530.4n ± ∞ ¹ -21.49% (p=0.008 n=5) Signbit-4 17.07n ± ∞ ¹ 17.95n ± ∞ ¹ +5.16% (p=0.008 n=5) Sin-4 171.6n ± ∞ ¹ 189.1n ± ∞ ¹ +10.20% (p=0.008 n=5) Sincos-4 201.5n ± ∞ ¹ 200.5n ± ∞ ¹ ~ (p=0.421 n=5) Sinh-4 529.6n ± ∞ ¹ 484.6n ± ∞ ¹ -8.50% (p=0.008 n=5) SqrtIndirect-4 5.021n ± ∞ ¹ 5.023n ± ∞ ¹ +0.04% (p=0.048 n=5) SqrtLatency-4 8.032n ± ∞ ¹ 8.039n ± ∞ ¹ +0.09% (p=0.024 n=5) SqrtIndirectLatency-4 8.036n ± ∞ ¹ 8.038n ± ∞ ¹ ~ (p=0.056 n=5) SqrtGoLatency-4 338.8n ± ∞ ¹ 338.7n ± ∞ ¹ ~ (p=0.841 n=5) SqrtPrime-4 5.379µ ± ∞ ¹ 5.382µ ± ∞ ¹ +0.06% (p=0.048 n=5) Tan-4 182.7n ± ∞ ¹ 191.8n ± ∞ ¹ +4.98% (p=0.008 n=5) Tanh-4 558.7n ± ∞ ¹ 497.6n ± ∞ ¹ -10.94% (p=0.008 n=5) Trunc-4 122.5n ± ∞ ¹ 122.6n ± ∞ ¹ ~ (p=0.405 n=5) Y0-4 892.8n ± ∞ ¹ 851.7n ± ∞ ¹ -4.60% (p=0.008 n=5) Y1-4 887.2n ± ∞ ¹ 863.2n ± ∞ ¹ -2.71% (p=0.008 n=5) Yn-4 1.889µ ± ∞ ¹ 1.832µ ± ∞ ¹ -3.02% (p=0.008 n=5) Float64bits-4 13.05n ± ∞ ¹ 13.06n ± ∞ ¹ +0.08% (p=0.040 n=5) Float64frombits-4 13.05n ± ∞ ¹ 13.06n ± ∞ ¹ ~ (p=0.143 n=5) Float32bits-4 13.05n ± ∞ ¹ 13.06n ± ∞ ¹ +0.08% (p=0.008 n=5) Float32frombits-4 13.05n ± ∞ ¹ 13.08n ± ∞ ¹ +0.23% (p=0.016 n=5) FMA-4 445.7n ± ∞ ¹ 448.1n ± ∞ ¹ +0.54% (p=0.008 n=5) geomean 157.2n 142.8n -9.17% Change-Id: I9bf104848b588c9ecf79401a81d483d7fcdb0a79 Reviewed-on: https://go-review.googlesource.com/c/go/+/481575 Reviewed-by: M Zhuo TryBot-Result: Gopher Robot Reviewed-by: Cherry Mui Auto-Submit: Than McIntosh Reviewed-by: Bryan Mills Run-TryBot: Than McIntosh Reviewed-by: Rong Zhang --- diff --git a/src/cmd/compile/internal/mips64/ssa.go b/src/cmd/compile/internal/mips64/ssa.go index 7ce4005e6d..dda7c4fdd6 100644 --- a/src/cmd/compile/internal/mips64/ssa.go +++ b/src/cmd/compile/internal/mips64/ssa.go @@ -358,6 +358,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpMIPS64MOVDF, ssa.OpMIPS64NEGF, ssa.OpMIPS64NEGD, + ssa.OpMIPS64ABSD, ssa.OpMIPS64SQRTF, ssa.OpMIPS64SQRTD: p := s.Prog(v.Op.Asm()) diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS64.rules b/src/cmd/compile/internal/ssa/_gen/MIPS64.rules index 2144a21352..3aa6a1b420 100644 --- a/src/cmd/compile/internal/ssa/_gen/MIPS64.rules +++ b/src/cmd/compile/internal/ssa/_gen/MIPS64.rules @@ -38,6 +38,9 @@ (Mod8 x y) => (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y))) (Mod8u x y) => (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y))) +// math package intrinsics +(Abs ...) => (ABSD ...) + // (x + y) / 2 with x>=y => (x - y) / 2 + y (Avg64u x y) => (ADDV (SRLVconst (SUBV x y) [1]) y) diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS64Ops.go b/src/cmd/compile/internal/ssa/_gen/MIPS64Ops.go index cc8b4ae155..2b3dd487cd 100644 --- a/src/cmd/compile/internal/ssa/_gen/MIPS64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/MIPS64Ops.go @@ -196,6 +196,7 @@ func init() { {name: "NEGV", argLength: 1, reg: gp11}, // -arg0 {name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32 {name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64 + {name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64 {name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64 {name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 5e04805ba7..2507e60f31 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1984,6 +1984,7 @@ const ( OpMIPS64NEGV OpMIPS64NEGF OpMIPS64NEGD + OpMIPS64ABSD OpMIPS64SQRTD OpMIPS64SQRTF OpMIPS64SLLV @@ -26554,6 +26555,19 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ABSD", + argLen: 1, + asm: mips.AABSD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1152921504338411520}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + outputs: []outputInfo{ + {0, 1152921504338411520}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, { name: "SQRTD", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS64.go b/src/cmd/compile/internal/ssa/rewriteMIPS64.go index 095d93a033..16426c300f 100644 --- a/src/cmd/compile/internal/ssa/rewriteMIPS64.go +++ b/src/cmd/compile/internal/ssa/rewriteMIPS64.go @@ -6,6 +6,9 @@ import "cmd/compile/internal/types" func rewriteValueMIPS64(v *Value) bool { switch v.Op { + case OpAbs: + v.Op = OpMIPS64ABSD + return true case OpAdd16: v.Op = OpMIPS64ADDV return true diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 4907113b06..5d15ffbdf1 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -4369,7 +4369,7 @@ func InitTables() { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpAbs, types.Types[types.TFLOAT64], args[0]) }, - sys.ARM64, sys.ARM, sys.PPC64, sys.RISCV64, sys.Wasm) + sys.ARM64, sys.ARM, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS64) addF("math", "Copysign", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue2(ssa.OpCopysign, types.Types[types.TFLOAT64], args[0], args[1]) diff --git a/test/codegen/math.go b/test/codegen/math.go index f172af435d..79b11ca228 100644 --- a/test/codegen/math.go +++ b/test/codegen/math.go @@ -81,6 +81,7 @@ func abs(x, y float64) { // riscv64:"FABSD\t" // wasm:"F64Abs" // arm/6:"ABSD\t" + // mips64/hardfloat:"ABSD\t" sink64[0] = math.Abs(x) // amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ)