]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsics for trunc, floor, ceil on ppc64x
authorLynn Boger <laboger@linux.vnet.ibm.com>
Wed, 9 Aug 2017 17:53:34 +0000 (13:53 -0400)
committerLynn Boger <laboger@linux.vnet.ibm.com>
Fri, 11 Aug 2017 16:35:49 +0000 (16:35 +0000)
This implements trunc, floor, and ceil in the math package
as intrinsics on ppc64x.  Significant improvement mainly due
to avoiding call overhead of args and return value.

BenchmarkCeil-16                    5.95          0.69          -88.40%
BenchmarkFloor-16                   5.95          0.69          -88.40%
BenchmarkTrunc-16                   5.82          0.69          -88.14%

Updates #21390

Change-Id: I951e182694f6e0c431da79c577272b81fb0ebad0
Reviewed-on: https://go-review.googlesource.com/54654
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ppc64/ssa.go
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/PPC64Ops.go
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewritePPC64.go

index 2400af340710860be52f0276c1db076ad2d2f033..db2fd24a307f6da60ede6524fe026023398e254d 100644 (file)
@@ -2724,6 +2724,21 @@ func init() {
                        return s.newValue1(ssa.OpSqrt, types.Types[TFLOAT64], args[0])
                },
                sys.AMD64, sys.ARM, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
+       addF("math", "Trunc",
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       return s.newValue1(ssa.OpTrunc, types.Types[TFLOAT64], args[0])
+               },
+               sys.PPC64)
+       addF("math", "Ceil",
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       return s.newValue1(ssa.OpCeil, types.Types[TFLOAT64], args[0])
+               },
+               sys.PPC64)
+       addF("math", "Floor",
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       return s.newValue1(ssa.OpFloor, types.Types[TFLOAT64], args[0])
+               },
+               sys.PPC64)
 
        /******** math/bits ********/
        addF("math/bits", "TrailingZeros64",
index 5fe140fdcf16e47041f250375cfe0512d307f066..1ad40881c3723723277a35ba7d5020fa5d61c641 100644 (file)
@@ -596,7 +596,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
 
-       case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB:
+       case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB:
                r := v.Reg()
                p := s.Prog(v.Op.Asm())
                p.To.Type = obj.TYPE_REG
index 81ac3c26afd2f56aa649163426e5ec3a4a692a1d..cf0fa49af4a1bf6ec9e2d1106b1e0c91d0881edb 100644 (file)
@@ -74,6 +74,9 @@
 (Round64F x) -> (LoweredRound64F x)
 
 (Sqrt x) -> (FSQRT x)
+(Floor x) -> (FFLOOR x)
+(Ceil x) -> (FCEIL x)
+(Trunc x) -> (FTRUNC x)
 
 // Lowering constants
 (Const8   [val]) -> (MOVDconst [val])
index 2e8e239f15f227adeaa9438541e164f1198db197..db8a88aec94c102671106ee879ddfe833910d73f 100644 (file)
@@ -241,6 +241,9 @@ func init() {
                {name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                // -arg0 (floating point)
                {name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                              // sqrt(arg0) (floating point)
                {name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                            // sqrt(arg0) (floating point, single precision)
+               {name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                              // floor(arg0), float64
+               {name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                               // ceil(arg0), float64
+               {name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                              // trunc(arg0), float64
 
                {name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                     // arg0|aux
                {name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                   // arg0^aux
index d962e4a193bf8d9663078d3b5843d4df77c32174..c6452b038d2943957a319dc86b650e1da36ec285 100644 (file)
@@ -255,7 +255,10 @@ var genericOps = []opData{
        {name: "PopCount32", argLength: 1}, // Count bits in arg[0]
        {name: "PopCount64", argLength: 1}, // Count bits in arg[0]
 
-       {name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
+       {name: "Sqrt", argLength: 1},  // sqrt(arg0), float64 only
+       {name: "Floor", argLength: 1}, // floor(arg0), float64 only
+       {name: "Ceil", argLength: 1},  // ceil(arg0), float64 only
+       {name: "Trunc", argLength: 1}, // trunc(arg0), float64 only
 
        // Data movement, max argument length for Phi is indefinite so just pick
        // a really large number
index 17a5e7020440cb0369dc1e8fedd5c375fcd018f3..e31ba3a750f50890e57219723334ed2f5646ec61 100644 (file)
@@ -1322,6 +1322,9 @@ const (
        OpPPC64FNEG
        OpPPC64FSQRT
        OpPPC64FSQRTS
+       OpPPC64FFLOOR
+       OpPPC64FCEIL
+       OpPPC64FTRUNC
        OpPPC64ORconst
        OpPPC64XORconst
        OpPPC64ANDconst
@@ -1800,6 +1803,9 @@ const (
        OpPopCount32
        OpPopCount64
        OpSqrt
+       OpFloor
+       OpCeil
+       OpTrunc
        OpPhi
        OpCopy
        OpConvert
@@ -16955,6 +16961,45 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "FFLOOR",
+               argLen: 1,
+               asm:    ppc64.AFRIM,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
+                       },
+                       outputs: []outputInfo{
+                               {0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
+                       },
+               },
+       },
+       {
+               name:   "FCEIL",
+               argLen: 1,
+               asm:    ppc64.AFRIP,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
+                       },
+                       outputs: []outputInfo{
+                               {0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
+                       },
+               },
+       },
+       {
+               name:   "FTRUNC",
+               argLen: 1,
+               asm:    ppc64.AFRIZ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
+                       },
+                       outputs: []outputInfo{
+                               {0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
+                       },
+               },
+       },
        {
                name:    "ORconst",
                auxType: auxInt64,
@@ -21976,6 +22021,21 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
+       {
+               name:    "Floor",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "Ceil",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "Trunc",
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "Phi",
                argLen:  -1,
index 20e354cb4a3f2d32429ab27709b03f12b191f5d1..8abf3fc6195f29371090a3e1a893a8715b929e08 100644 (file)
@@ -73,6 +73,8 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpBitLen32_0(v)
        case OpBitLen64:
                return rewriteValuePPC64_OpBitLen64_0(v)
+       case OpCeil:
+               return rewriteValuePPC64_OpCeil_0(v)
        case OpClosureCall:
                return rewriteValuePPC64_OpClosureCall_0(v)
        case OpCom16:
@@ -161,6 +163,8 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpEqB_0(v)
        case OpEqPtr:
                return rewriteValuePPC64_OpEqPtr_0(v)
+       case OpFloor:
+               return rewriteValuePPC64_OpFloor_0(v)
        case OpGeq16:
                return rewriteValuePPC64_OpGeq16_0(v)
        case OpGeq16U:
@@ -583,6 +587,8 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpSub8_0(v)
        case OpSubPtr:
                return rewriteValuePPC64_OpSubPtr_0(v)
+       case OpTrunc:
+               return rewriteValuePPC64_OpTrunc_0(v)
        case OpTrunc16to8:
                return rewriteValuePPC64_OpTrunc16to8_0(v)
        case OpTrunc32to16:
@@ -1070,6 +1076,17 @@ func rewriteValuePPC64_OpBitLen64_0(v *Value) bool {
                return true
        }
 }
+func rewriteValuePPC64_OpCeil_0(v *Value) bool {
+       // match: (Ceil x)
+       // cond:
+       // result: (FCEIL x)
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64FCEIL)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValuePPC64_OpClosureCall_0(v *Value) bool {
        // match: (ClosureCall [argwid] entry closure mem)
        // cond:
@@ -1823,6 +1840,17 @@ func rewriteValuePPC64_OpEqPtr_0(v *Value) bool {
                return true
        }
 }
+func rewriteValuePPC64_OpFloor_0(v *Value) bool {
+       // match: (Floor x)
+       // cond:
+       // result: (FFLOOR x)
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64FFLOOR)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValuePPC64_OpGeq16_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -10463,6 +10491,17 @@ func rewriteValuePPC64_OpSubPtr_0(v *Value) bool {
                return true
        }
 }
+func rewriteValuePPC64_OpTrunc_0(v *Value) bool {
+       // match: (Trunc x)
+       // cond:
+       // result: (FTRUNC x)
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64FTRUNC)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValuePPC64_OpTrunc16to8_0(v *Value) bool {
        // match: (Trunc16to8 x)
        // cond: