]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify math.RoundToEven and math.Abs on arm64
authorerifan01 <eric.fang@arm.com>
Tue, 22 May 2018 06:58:32 +0000 (06:58 +0000)
committerCherry Zhang <cherryyz@google.com>
Thu, 13 Sep 2018 14:52:51 +0000 (14:52 +0000)
math.RoundToEven can be done by one arm64 instruction FRINTND, intrinsify it to improve performance.
The current pure Go implementation of the function Abs is translated into five instructions on arm64:
str, ldr, and, str, ldr. The intrinsic implementation requires only one instruction, so in terms of
performance, intrinsify it is worthwhile.

Benchmarks:
name           old time/op  new time/op  delta
Abs-8          3.50ns ± 0%  1.50ns ± 0%  -57.14%  (p=0.000 n=10+10)
RoundToEven-8  9.26ns ± 0%  1.50ns ± 0%  -83.80%  (p=0.000 n=10+10)

Change-Id: I9456b26ab282b544dfac0154fc86f17aed96ac3d
Reviewed-on: https://go-review.googlesource.com/116535
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
test/codegen/math.go

index 192654158276d44680ce78391c173d1e489655fc..ce6d32f536fea8b1ab526b29bd9359f97bed40d5 100644 (file)
@@ -698,6 +698,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                fallthrough
        case ssa.OpARM64MVN,
                ssa.OpARM64NEG,
+               ssa.OpARM64FABSD,
                ssa.OpARM64FMOVDfpgp,
                ssa.OpARM64FMOVDgpfp,
                ssa.OpARM64FNEGS,
@@ -730,6 +731,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                ssa.OpARM64CLZW,
                ssa.OpARM64FRINTAD,
                ssa.OpARM64FRINTMD,
+               ssa.OpARM64FRINTND,
                ssa.OpARM64FRINTPD,
                ssa.OpARM64FRINTZD:
                p := s.Prog(v.Op.Asm())
index bb076f870849df7a203126adb9d7eb31ff82d55e..2ee966d890f85996c8f65685f5df1fabe8c34827 100644 (file)
@@ -3149,12 +3149,12 @@ func init() {
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpRoundToEven, types.Types[TFLOAT64], args[0])
                },
-               sys.S390X)
+               sys.ARM64, sys.S390X)
        addF("math", "Abs",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpAbs, types.Types[TFLOAT64], args[0])
                },
-               sys.PPC64)
+               sys.ARM64, sys.PPC64)
        addF("math", "Copysign",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue2(ssa.OpCopysign, types.Types[TFLOAT64], args[0], args[1])
index 4c9d9a6f7a38f9e0c359c8fb2e3a04715b09cc18..b2ce875a057dbd4c063fc0151e3eea98e4ed70d1 100644 (file)
 (Com8 x) -> (MVN x)
 
 // math package intrinsics
+(Abs x) -> (FABSD x)
 (Sqrt x) -> (FSQRTD x)
 (Ceil  x) -> (FRINTPD x)
 (Floor x) -> (FRINTMD x)
 (Round x) -> (FRINTAD x)
+(RoundToEven x) -> (FRINTND x)
 (Trunc x) -> (FRINTZD x)
 
 // lowering rotates
index 43230fbf70faf506e1a91bd781ef8243ed28d576..da078517d44fe240857f61f78c1e5e70fedcedcc 100644 (file)
@@ -212,6 +212,7 @@ func init() {
                // unary ops
                {name: "MVN", argLength: 1, reg: gp11, asm: "MVN"},         // ^arg0
                {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},         // -arg0
+               {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD"},     // abs(arg0), float64
                {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"},     // -arg0, float32
                {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"},     // -arg0, float64
                {name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"},   // sqrt(arg0), float64
@@ -424,6 +425,7 @@ func init() {
                // floating-point round to integral
                {name: "FRINTAD", argLength: 1, reg: fp11, asm: "FRINTAD"},
                {name: "FRINTMD", argLength: 1, reg: fp11, asm: "FRINTMD"},
+               {name: "FRINTND", argLength: 1, reg: fp11, asm: "FRINTND"},
                {name: "FRINTPD", argLength: 1, reg: fp11, asm: "FRINTPD"},
                {name: "FRINTZD", argLength: 1, reg: fp11, asm: "FRINTZD"},
 
index 30c57874f6a1736b7c3382cf7ddf34b9643675f3..b32dca410319a5c7bc3cc813488a23e5e9297c18 100644 (file)
@@ -1107,6 +1107,7 @@ const (
        OpARM64LoweredMuluhilo
        OpARM64MVN
        OpARM64NEG
+       OpARM64FABSD
        OpARM64FNEGS
        OpARM64FNEGD
        OpARM64FSQRTD
@@ -1277,6 +1278,7 @@ const (
        OpARM64FCVTDS
        OpARM64FRINTAD
        OpARM64FRINTMD
+       OpARM64FRINTND
        OpARM64FRINTPD
        OpARM64FRINTZD
        OpARM64CSEL
@@ -14658,6 +14660,19 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "FABSD",
+               argLen: 1,
+               asm:    arm64.AFABSD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+                       outputs: []outputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
        {
                name:   "FNEGS",
                argLen: 1,
@@ -16965,6 +16980,19 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "FRINTND",
+               argLen: 1,
+               asm:    arm64.AFRINTND,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+                       outputs: []outputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
        {
                name:   "FRINTPD",
                argLen: 1,
index 1dcbd9348099ac60af09d593734ab9394e511881..2108452c03b5f10d5e870b514b886a6cd3e3f9db 100644 (file)
@@ -331,6 +331,8 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64XORshiftRA_0(v)
        case OpARM64XORshiftRL:
                return rewriteValueARM64_OpARM64XORshiftRL_0(v)
+       case OpAbs:
+               return rewriteValueARM64_OpAbs_0(v)
        case OpAdd16:
                return rewriteValueARM64_OpAdd16_0(v)
        case OpAdd32:
@@ -747,6 +749,8 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpRound32F_0(v)
        case OpRound64F:
                return rewriteValueARM64_OpRound64F_0(v)
+       case OpRoundToEven:
+               return rewriteValueARM64_OpRoundToEven_0(v)
        case OpRsh16Ux16:
                return rewriteValueARM64_OpRsh16Ux16_0(v)
        case OpRsh16Ux32:
@@ -29214,6 +29218,17 @@ func rewriteValueARM64_OpARM64XORshiftRL_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpAbs_0(v *Value) bool {
+       // match: (Abs x)
+       // cond:
+       // result: (FABSD x)
+       for {
+               x := v.Args[0]
+               v.reset(OpARM64FABSD)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueARM64_OpAdd16_0(v *Value) bool {
        // match: (Add16 x y)
        // cond:
@@ -33407,6 +33422,17 @@ func rewriteValueARM64_OpRound64F_0(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM64_OpRoundToEven_0(v *Value) bool {
+       // match: (RoundToEven x)
+       // cond:
+       // result: (FRINTND x)
+       for {
+               x := v.Args[0]
+               v.reset(OpARM64FRINTND)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueARM64_OpRsh16Ux16_0(v *Value) bool {
        b := v.Block
        _ = b
index 3d5f0917ef7bc6f56f2ce001dd8da2e55c474afd..6afe1833459ddba01c79f48540aec8e399064ccb 100644 (file)
@@ -32,6 +32,7 @@ func approx(x float64) {
        sink64[3] = math.Trunc(x)
 
        // s390x:"FIDBR\t[$]4"
+       // arm64:"FRINTND"
        sink64[4] = math.RoundToEven(x)
 }
 
@@ -48,6 +49,7 @@ func sqrt(x float64) float64 {
 // Check that it's using integer registers
 func abs(x, y float64) {
        // amd64:"BTRQ\t[$]63"
+       // arm64:"FABSD\t"
        // s390x:"LPDFR\t",-"MOVD\t"     (no integer load/store)
        // ppc64le:"FABS\t"
        sink64[0] = math.Abs(x)