]> Cypherpunks repositories - gostls13.git/commitdiff
math: optimize math.Abs on mipsx
authorJunxian Zhu <zhujunxian@oss.cipunited.com>
Fri, 14 Apr 2023 08:06:40 +0000 (16:06 +0800)
committerCherry Mui <cherryyz@google.com>
Mon, 8 May 2023 15:53:28 +0000 (15:53 +0000)
This commit optimized math.Abs function implementation on mipsx.
Tested on loongson 3A2000.

goos: linux
goarch: mipsle
pkg: math
                      │   oldmath    │              newmath               │
                      │    sec/op    │   sec/op     vs base               │
Acos-4                   282.6n ± 0%   282.3n ± 0%        ~ (p=0.140 n=7)
Acosh-4                  506.1n ± 0%   451.8n ± 0%  -10.73% (p=0.001 n=7)
Asin-4                   272.3n ± 0%   272.2n ± 0%        ~ (p=0.808 n=7)
Asinh-4                  529.7n ± 0%   475.3n ± 0%  -10.27% (p=0.001 n=7)
Atan-4                   208.2n ± 0%   207.9n ± 0%        ~ (p=0.134 n=7)
Atanh-4                  503.4n ± 1%   449.7n ± 0%  -10.67% (p=0.001 n=7)
Atan2-4                  310.5n ± 0%   310.5n ± 0%        ~ (p=0.928 n=7)
Cbrt-4                   359.3n ± 0%   358.8n ± 0%        ~ (p=0.121 n=7)
Ceil-4                   203.9n ± 0%   204.0n ± 0%        ~ (p=0.600 n=7)
Compare-4                23.11n ± 0%   23.11n ± 0%        ~ (p=0.702 n=7)
Compare32-4              19.09n ± 0%   19.12n ± 0%        ~ (p=0.070 n=7)
Copysign-4               33.20n ± 0%   34.02n ± 0%   +2.47% (p=0.001 n=7)
Cos-4                    422.5n ± 0%   385.4n ± 1%   -8.78% (p=0.001 n=7)
Cosh-4                   628.0n ± 0%   545.5n ± 0%  -13.14% (p=0.001 n=7)
Erf-4                    193.7n ± 2%   192.7n ± 1%        ~ (p=0.430 n=7)
Erfc-4                   192.8n ± 1%   193.0n ± 0%        ~ (p=0.245 n=7)
Erfinv-4                 220.7n ± 1%   221.5n ± 2%        ~ (p=0.272 n=7)
Erfcinv-4                221.3n ± 1%   220.4n ± 2%        ~ (p=0.738 n=7)
Exp-4                    471.4n ± 0%   435.1n ± 0%   -7.70% (p=0.001 n=7)
ExpGo-4                  470.6n ± 0%   434.0n ± 0%   -7.78% (p=0.001 n=7)
Expm1-4                  243.1n ± 0%   243.4n ± 0%        ~ (p=0.417 n=7)
Exp2-4                   463.1n ± 0%   427.0n ± 0%   -7.80% (p=0.001 n=7)
Exp2Go-4                 462.4n ± 0%   426.2n ± 5%   -7.83% (p=0.001 n=7)
Abs-4                   37.000n ± 0%   8.039n ± 9%  -78.27% (p=0.001 n=7)
Dim-4                    18.09n ± 0%   18.11n ± 0%        ~ (p=0.094 n=7)
Floor-4                  151.9n ± 0%   151.8n ± 0%        ~ (p=0.190 n=7)
Max-4                    116.7n ± 1%   116.7n ± 1%        ~ (p=0.842 n=7)
Min-4                    116.6n ± 1%   116.6n ± 0%        ~ (p=0.464 n=7)
Mod-4                   1244.0n ± 0%   980.9n ± 0%  -21.15% (p=0.001 n=7)
Frexp-4                  199.0n ± 0%   146.7n ± 0%  -26.28% (p=0.001 n=7)
Gamma-4                  516.4n ± 0%   479.3n ± 1%   -7.18% (p=0.001 n=7)
Hypot-4                  169.8n ± 0%   117.8n ± 2%  -30.62% (p=0.001 n=7)
HypotGo-4                170.8n ± 0%   117.5n ± 0%  -31.21% (p=0.001 n=7)
Ilogb-4                  160.8n ± 0%   109.5n ± 0%  -31.90% (p=0.001 n=7)
J0-4                     1.359µ ± 0%   1.305µ ± 0%   -3.97% (p=0.001 n=7)
J1-4                     1.386µ ± 0%   1.334µ ± 0%   -3.75% (p=0.001 n=7)
Jn-4                     2.864µ ± 0%   2.758µ ± 0%   -3.70% (p=0.001 n=7)
Ldexp-4                  202.9n ± 0%   151.7n ± 0%  -25.23% (p=0.001 n=7)
Lgamma-4                 234.0n ± 0%   234.3n ± 0%        ~ (p=0.199 n=7)
Log-4                    444.1n ± 0%   407.9n ± 0%   -8.15% (p=0.001 n=7)
Logb-4                   157.8n ± 0%   121.6n ± 0%  -22.94% (p=0.001 n=7)
Log1p-4                  354.8n ± 0%   315.4n ± 0%  -11.10% (p=0.001 n=7)
Log10-4                  453.9n ± 0%   417.9n ± 0%   -7.93% (p=0.001 n=7)
Log2-4                   245.3n ± 0%   209.1n ± 0%  -14.76% (p=0.001 n=7)
Modf-4                   126.6n ± 0%   126.6n ± 0%        ~ (p=0.126 n=7)
Nextafter32-4            112.5n ± 0%   112.5n ± 0%        ~ (p=0.853 n=7)
Nextafter64-4            141.7n ± 0%   141.6n ± 0%        ~ (p=0.331 n=7)
PowInt-4                 878.8n ± 1%   758.3n ± 1%  -13.71% (p=0.001 n=7)
PowFrac-4                1.809µ ± 0%   1.615µ ± 0%  -10.72% (p=0.001 n=7)
Pow10Pos-4               18.10n ± 0%   18.12n ± 0%        ~ (p=0.464 n=7)
Pow10Neg-4               17.09n ± 0%   17.09n ± 0%        ~ (p=0.263 n=7)
Round-4                  68.36n ± 0%   68.33n ± 0%        ~ (p=0.325 n=7)
RoundToEven-4            78.40n ± 0%   78.40n ± 0%        ~ (p=0.934 n=7)
Remainder-4              894.0n ± 1%   753.4n ± 1%  -15.73% (p=0.001 n=7)
Signbit-4                18.09n ± 0%   18.09n ± 0%        ~ (p=0.761 n=7)
Sin-4                    389.8n ± 1%   389.8n ± 0%        ~ (p=0.995 n=7)
Sincos-4                 416.0n ± 0%   415.9n ± 0%        ~ (p=0.361 n=7)
Sinh-4                   634.6n ± 4%   585.6n ± 1%   -7.72% (p=0.001 n=7)
SqrtIndirect-4           8.035n ± 0%   8.036n ± 0%        ~ (p=0.523 n=7)
SqrtLatency-4            8.039n ± 0%   8.037n ± 0%        ~ (p=0.218 n=7)
SqrtIndirectLatency-4    8.040n ± 0%   8.040n ± 0%        ~ (p=0.652 n=7)
SqrtGoLatency-4          895.7n ± 0%   896.6n ± 0%   +0.10% (p=0.004 n=7)
SqrtPrime-4              5.406µ ± 0%   5.407µ ± 0%        ~ (p=0.592 n=7)
Tan-4                    406.1n ± 0%   405.8n ± 1%        ~ (p=0.435 n=7)
Tanh-4                   627.6n ± 0%   545.5n ± 0%  -13.08% (p=0.001 n=7)
Trunc-4                  146.7n ± 1%   146.7n ± 0%        ~ (p=0.755 n=7)
Y0-4                     1.359µ ± 0%   1.310µ ± 0%   -3.61% (p=0.001 n=7)
Y1-4                     1.351µ ± 0%   1.301µ ± 0%   -3.70% (p=0.001 n=7)
Yn-4                     2.829µ ± 0%   2.729µ ± 0%   -3.53% (p=0.001 n=7)
Float64bits-4            14.08n ± 0%   14.07n ± 0%        ~ (p=0.069 n=7)
Float64frombits-4        19.09n ± 0%   19.10n ± 0%        ~ (p=0.755 n=7)
Float32bits-4            13.06n ± 0%   13.07n ± 1%        ~ (p=0.586 n=7)
Float32frombits-4        13.06n ± 0%   13.06n ± 0%        ~ (p=0.853 n=7)
FMA-4                    606.9n ± 0%   606.8n ± 0%        ~ (p=0.393 n=7)
geomean                  201.1n        185.4n        -7.81%

Change-Id: I6d41a97ad3789ed5731588588859ac0b8b13b664
Reviewed-on: https://go-review.googlesource.com/c/go/+/484675
Reviewed-by: Rong Zhang <rongrong@oss.cipunited.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Than McIntosh <thanm@google.com>

src/cmd/compile/internal/mips/ssa.go
src/cmd/compile/internal/ssa/_gen/MIPS.rules
src/cmd/compile/internal/ssa/_gen/MIPSOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteMIPS.go
src/cmd/compile/internal/ssagen/ssa.go
test/codegen/math.go

index c42eba57866369e49cb186e9aa6b2e5730fb6078..2cfe57f7f4643a50e1c1db56cfd0ffc037e86a0e 100644 (file)
@@ -363,6 +363,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpMIPSMOVDF,
                ssa.OpMIPSNEGF,
                ssa.OpMIPSNEGD,
+               ssa.OpMIPSABSD,
                ssa.OpMIPSSQRTF,
                ssa.OpMIPSSQRTD,
                ssa.OpMIPSCLZ:
index aeb117da1775071a1976c061126b5974f29b2b75..b36402dd0a9f590494b8b77e670048445517c3b6 100644 (file)
@@ -37,6 +37,9 @@
 (Mod8 x y) => (Select0 (DIV (SignExt8to32 x) (SignExt8to32 y)))
 (Mod8u x y) => (Select0 (DIVU (ZeroExt8to32 x) (ZeroExt8to32 y)))
 
+// math package intrinsics
+(Abs ...) => (ABSD ...)
+
 // (x + y) / 2 with x>=y  becomes  (x - y) / 2 + y
 (Avg32u <t> x y) => (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
 
index ac209bbddadc419cde3d73732cec0887e282d9c5..b5d9d2547591eb5cc2aa4111b9305eb683b7592c 100644 (file)
@@ -179,6 +179,7 @@ func init() {
                {name: "NEG", argLength: 1, reg: gp11},                 // -arg0
                {name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"},   // -arg0, float32
                {name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"},   // -arg0, float64
+               {name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"},   // abs(arg0), float64
                {name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
                {name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
 
index 2507e60f319407b50dcfd4ac41513bf42118bd55..b8ef89d76e571b26c08788abfd9e6ffcb485e43e 100644 (file)
@@ -1872,6 +1872,7 @@ const (
        OpMIPSNEG
        OpMIPSNEGF
        OpMIPSNEGD
+       OpMIPSABSD
        OpMIPSSQRTD
        OpMIPSSQRTF
        OpMIPSSLL
@@ -25054,6 +25055,19 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "ABSD",
+               argLen: 1,
+               asm:    mips.AABSD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 35183835217920}, // F0 F2 F4 F6 F8 F10 F12 F14 F16 F18 F20 F22 F24 F26 F28 F30
+                       },
+                       outputs: []outputInfo{
+                               {0, 35183835217920}, // F0 F2 F4 F6 F8 F10 F12 F14 F16 F18 F20 F22 F24 F26 F28 F30
+                       },
+               },
+       },
        {
                name:   "SQRTD",
                argLen: 1,
index b3650c4200e970bdab0e797eac8b4afd47b65fcc..1f44346b7f6eaaa24c0a5315a52d388210c7e1de 100644 (file)
@@ -6,6 +6,9 @@ import "cmd/compile/internal/types"
 
 func rewriteValueMIPS(v *Value) bool {
        switch v.Op {
+       case OpAbs:
+               v.Op = OpMIPSABSD
+               return true
        case OpAdd16:
                v.Op = OpMIPSADD
                return true
index 5d15ffbdf14e521ab3c783fc78d3a7aae707c5cf..37b5a26d5cbe0a10cd09767cdc80fd88a5acbf8d 100644 (file)
@@ -4369,7 +4369,7 @@ func InitTables() {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpAbs, types.Types[types.TFLOAT64], args[0])
                },
-               sys.ARM64, sys.ARM, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS64)
+               sys.ARM64, sys.ARM, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS, sys.MIPS64)
        addF("math", "Copysign",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue2(ssa.OpCopysign, types.Types[types.TFLOAT64], args[0], args[1])
index 79b11ca228f17c97ea55b04070a26b3201161e46..6b592754625b6afc93971edb27edcc39378a898e 100644 (file)
@@ -82,6 +82,7 @@ func abs(x, y float64) {
        // wasm:"F64Abs"
        // arm/6:"ABSD\t"
        // mips64/hardfloat:"ABSD\t"
+       // mips/hardfloat:"ABSD\t"
        sink64[0] = math.Abs(x)
 
        // amd64:"BTRQ\t[$]63","PXOR"    (TODO: this should be BTSQ)