]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile, math: make math.{Abs,Copysign} intrinsics on loong64
authorXiaolin Zhao <zhaoxiaolin@loongson.cn>
Mon, 1 Apr 2024 09:13:42 +0000 (17:13 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Wed, 7 Aug 2024 01:16:42 +0000 (01:16 +0000)
goos: linux
goarch: loong64
pkg: math
cpu: Loongson-3A6000 @ 2500.00MHz
         │  old.bench   │              new.bench               │
         │    sec/op    │    sec/op     vs base                │
Copysign   1.9710n ± 0%   0.8006n ± 0%  -59.38% (p=0.000 n=10)
Abs        1.8745n ± 0%   0.8006n ± 0%  -57.29% (p=0.000 n=10)
geomean     1.922n        0.8006n       -58.35%

goos: linux
goarch: loong64
pkg: math
cpu: Loongson-3A5000 @ 2500.00MHz
         │  old.bench   │              new.bench               │
         │    sec/op    │    sec/op     vs base                │
Copysign   2.4020n ± 0%   0.9006n ± 0%  -62.51% (p=0.000 n=10)
Abs        2.4020n ± 0%   0.8005n ± 0%  -66.67% (p=0.000 n=10)
geomean     2.402n        0.8491n       -64.65%

Updates #59120.

Change-Id: Ic409e1f4d15ad15cb3568a5aaa100046e9302842
Reviewed-on: https://go-review.googlesource.com/c/go/+/580280
Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/loong64/ssa.go
src/cmd/compile/internal/ssa/_gen/LOONG64.rules
src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteLOONG64.go
src/cmd/compile/internal/ssagen/ssa.go

index 10190654d7ba8b9819ad0787444b111b1d9a6860..fd5ed5f928bba9bbe0d0083f42288b4ff87c4eb5 100644 (file)
@@ -177,7 +177,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpLOONG64DIVF,
                ssa.OpLOONG64DIVD,
                ssa.OpLOONG64MULV, ssa.OpLOONG64MULHV, ssa.OpLOONG64MULHVU,
-               ssa.OpLOONG64DIVV, ssa.OpLOONG64REMV, ssa.OpLOONG64DIVVU, ssa.OpLOONG64REMVU:
+               ssa.OpLOONG64DIVV, ssa.OpLOONG64REMV, ssa.OpLOONG64DIVVU, ssa.OpLOONG64REMVU,
+               ssa.OpLOONG64FCOPYSGD:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG
                p.From.Reg = v.Args[1].Reg()
@@ -420,7 +421,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpLOONG64NEGF,
                ssa.OpLOONG64NEGD,
                ssa.OpLOONG64SQRTD,
-               ssa.OpLOONG64SQRTF:
+               ssa.OpLOONG64SQRTF,
+               ssa.OpLOONG64ABSD:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG
                p.From.Reg = v.Args[0].Reg()
index 6beeb4e0ccbdd87a36e454e06270d5bb781d9a41..014abcbd264602066a83f002ebd0772027246ad6 100644 (file)
 
 (Com(64|32|16|8) x) => (NOR (MOVVconst [0]) x)
 
+// math package intrinsics
 (Sqrt ...) => (SQRTD ...)
 (Sqrt32 ...) => (SQRTF ...)
+(Abs ...) => (ABSD ...)
+(Copysign ...) => (FCOPYSGD ...)
 
 (Min(64|32)F ...) => (FMIN(D|F) ...)
 (Max(64|32)F ...) => (FMAX(D|F) ...)
index aa030f4fa098b21e8708c25de1d8de4e6e8649bd..874c0c9e09945eb06a60f0ccf757c476246c1a03 100644 (file)
@@ -201,6 +201,9 @@ func init() {
                {name: "MASKEQZ", argLength: 2, reg: gp21, asm: "MASKEQZ"}, // returns 0 if arg1 == 0, otherwise returns arg0
                {name: "MASKNEZ", argLength: 2, reg: gp21, asm: "MASKNEZ"}, // returns 0 if arg1 != 0, otherwise returns arg0
 
+               {name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"},         // abs(arg0), float64
+               {name: "FCOPYSGD", argLength: 2, reg: fp21, asm: "FCOPYSGD"}, // float64
+
                // shifts
                {name: "SLLV", argLength: 2, reg: gp21, asm: "SLLV"},                      // arg0 << arg1, shift amount is mod 64
                {name: "SLLVconst", argLength: 1, reg: gp11, asm: "SLLV", aux: "Int64"},   // arg0 << auxInt
index 7216f2df01a1798f3ab1f1fe14b442d9edacf82a..ef39c6894fd5cb453a532cde3bed2245023ec60f 100644 (file)
@@ -1779,6 +1779,8 @@ const (
        OpLOONG64FMAXD
        OpLOONG64MASKEQZ
        OpLOONG64MASKNEZ
+       OpLOONG64ABSD
+       OpLOONG64FCOPYSGD
        OpLOONG64SLLV
        OpLOONG64SLLVconst
        OpLOONG64SRLV
@@ -23970,6 +23972,33 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "ABSD",
+               argLen: 1,
+               asm:    loong64.AABSD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+                       outputs: []outputInfo{
+                               {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
+       {
+               name:   "FCOPYSGD",
+               argLen: 2,
+               asm:    loong64.AFCOPYSGD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                               {1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+                       outputs: []outputInfo{
+                               {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
        {
                name:   "SLLV",
                argLen: 2,
index 8fa31d73f6f94f6bed94e5e9461658f2a7ce9a04..58f33bd4c4f3dfed3e1751d1bd7f969ef165c259 100644 (file)
@@ -6,6 +6,9 @@ import "cmd/compile/internal/types"
 
 func rewriteValueLOONG64(v *Value) bool {
        switch v.Op {
+       case OpAbs:
+               v.Op = OpLOONG64ABSD
+               return true
        case OpAdd16:
                v.Op = OpLOONG64ADDV
                return true
@@ -116,6 +119,9 @@ func rewriteValueLOONG64(v *Value) bool {
                return rewriteValueLOONG64_OpConstBool(v)
        case OpConstNil:
                return rewriteValueLOONG64_OpConstNil(v)
+       case OpCopysign:
+               v.Op = OpLOONG64FCOPYSGD
+               return true
        case OpCvt32Fto32:
                v.Op = OpLOONG64TRUNCFW
                return true
index c1c991012703bf68c10a7e2daf4b8af0e14847f3..14e75f74f376320b7d4043c013cc2a5cd5fa5fcf 100644 (file)
@@ -4721,12 +4721,12 @@ func InitTables() {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpAbs, types.Types[types.TFLOAT64], args[0])
                },
-               sys.ARM64, sys.ARM, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS, sys.MIPS64)
+               sys.ARM64, sys.ARM, sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS, sys.MIPS64)
        addF("math", "Copysign",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue2(ssa.OpCopysign, types.Types[types.TFLOAT64], args[0], args[1])
                },
-               sys.PPC64, sys.RISCV64, sys.Wasm)
+               sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm)
        addF("math", "FMA",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])