]> Cypherpunks repositories - gostls13.git/commitdiff
math, cmd/internal/obj/ppc64: improve floor, ceil, trunc with asm
authorLynn Boger <laboger@linux.vnet.ibm.com>
Thu, 22 Sep 2016 19:54:45 +0000 (14:54 -0500)
committerMichael Munday <munday@ca.ibm.com>
Fri, 23 Sep 2016 13:03:08 +0000 (13:03 +0000)
This adds the instructions frim, frip, and friz to the ppc64x
assembler for use in implementing the math.Floor, math.Ceil, and
math.Trunc functions to improve performance.

Fixes #17185

BenchmarkCeil-128                    21.4          6.99          -67.34%
BenchmarkFloor-128                   13.9          6.37          -54.17%
BenchmarkTrunc-128                   12.7          6.33          -50.16%

Change-Id: I96131bd4e8c9c8dbafb25bfeb544cf9d2dbb4282
Reviewed-on: https://go-review.googlesource.com/29654
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Michael Munday <munday@ca.ibm.com>
src/cmd/internal/obj/ppc64/a.out.go
src/cmd/internal/obj/ppc64/anames.go
src/cmd/internal/obj/ppc64/asm9.go
src/math/floor_ppc64x.s [new file with mode: 0644]
src/math/stubs_ppc64x.s

index 1b92a669c8c5afd9d291679f74d559128806be00..477bc9acd94f067991db8966865d135ca62f1262 100644 (file)
@@ -496,6 +496,12 @@ const (
        /* optional on 32-bit */
        AFRES
        AFRESCC
+       AFRIM
+       AFRIMCC
+       AFRIP
+       AFRIPCC
+       AFRIZ
+       AFRIZCC
        AFRSQRTE
        AFRSQRTECC
        AFSEL
index e064d45c333acbdf279c9b6f44bd0d7075fe300a..1d766a2d6bd0a1341be0407b8539afe5845f9047 100644 (file)
@@ -229,6 +229,12 @@ var Anames = []string{
        "RFCI",
        "FRES",
        "FRESCC",
+       "FRIM",
+       "FRIMCC",
+       "FRIP",
+       "FRIPCC",
+       "FRIZ",
+       "FRIZCC",
        "FRSQRTE",
        "FRSQRTECC",
        "FSEL",
index 79282acd1c7a4862aff2b86998042018761fee88..2a6f1bc5779f33be7c5e6108cf5844901ce13573 100644 (file)
@@ -1371,6 +1371,12 @@ func buildop(ctxt *obj.Link) {
                        opset(AFCFIDUCC, r0)
                        opset(AFRES, r0)
                        opset(AFRESCC, r0)
+                       opset(AFRIM, r0)
+                       opset(AFRIMCC, r0)
+                       opset(AFRIP, r0)
+                       opset(AFRIPCC, r0)
+                       opset(AFRIZ, r0)
+                       opset(AFRIZCC, r0)
                        opset(AFRSQRTE, r0)
                        opset(AFRSQRTECC, r0)
                        opset(AFSQRT, r0)
@@ -3173,6 +3179,18 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
                return OPVCC(59, 24, 0, 0)
        case AFRESCC:
                return OPVCC(59, 24, 0, 1)
+       case AFRIM:
+               return OPVCC(63, 488, 0, 0)
+       case AFRIMCC:
+               return OPVCC(63, 488, 0, 1)
+       case AFRIP:
+               return OPVCC(63, 456, 0, 0)
+       case AFRIPCC:
+               return OPVCC(63, 456, 0, 1)
+       case AFRIZ:
+               return OPVCC(63, 424, 0, 0)
+       case AFRIZCC:
+               return OPVCC(63, 424, 0, 1)
        case AFRSP:
                return OPVCC(63, 12, 0, 0)
        case AFRSPCC:
diff --git a/src/math/floor_ppc64x.s b/src/math/floor_ppc64x.s
new file mode 100644 (file)
index 0000000..2ab011d
--- /dev/null
@@ -0,0 +1,25 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ppc64 ppc64le
+
+#include "textflag.h"
+
+TEXT ·Floor(SB),NOSPLIT,$0
+       FMOVD   x+0(FP), F0
+       FRIM    F0, F0
+       FMOVD   F0, ret+8(FP)
+       RET
+
+TEXT ·Ceil(SB),NOSPLIT,$0
+       FMOVD   x+0(FP), F0
+       FRIP    F0, F0
+       FMOVD   F0, ret+8(FP)
+       RET
+
+TEXT ·Trunc(SB),NOSPLIT,$0
+       FMOVD   x+0(FP), F0
+       FRIZ    F0, F0
+       FMOVD   F0, ret+8(FP)
+       RET
index a57357e2ee5d24673f99c6fde8731c5ebeef7cd0..de8a5ff8bf8268c452a9462cbb7c713441ab4e23 100644 (file)
@@ -36,15 +36,6 @@ TEXT ·Expm1(SB),NOSPLIT,$0
 TEXT ·Exp(SB),NOSPLIT,$0
        BR ·exp(SB)
 
-TEXT ·Floor(SB),NOSPLIT,$0
-       BR ·floor(SB)
-
-TEXT ·Ceil(SB),NOSPLIT,$0
-       BR ·ceil(SB)
-
-TEXT ·Trunc(SB),NOSPLIT,$0
-       BR ·trunc(SB)
-
 TEXT ·Frexp(SB),NOSPLIT,$0
        BR ·frexp(SB)