]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify TrailingZeros16, OnesCount{8,16} for ppc64x
authorCarlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Tue, 2 Oct 2018 02:37:00 +0000 (23:37 -0300)
committerLynn Boger <laboger@linux.vnet.ibm.com>
Thu, 11 Oct 2018 13:21:50 +0000 (13:21 +0000)
This change implements TrailingZeros16, OnesCount8 and OnesCount16
as intrinsics for ppc64x.

benchmark                       old ns/op     new ns/op     delta
BenchmarkTrailingZeros16-40     2.16          1.61          -25.46%

benchmark                   old ns/op     new ns/op     delta
BenchmarkOnesCount-40       0.71          0.71          +0.00%
BenchmarkOnesCount8-40      0.93          0.69          -25.81%
BenchmarkOnesCount16-40     1.54          0.75          -51.30%
BenchmarkOnesCount32-40     0.75          0.74          -1.33%
BenchmarkOnesCount64-40     0.71          0.71          +0.00%

Change-Id: I010fa9c0ef596a09362870d81193c633e70da637
Reviewed-on: https://go-review.googlesource.com/c/139137
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/rewritePPC64.go

index 469fbb8c96968efb087411c375e69ebf3985a021..5b11e15655f4565b56135bb0342249770b21dbd3 100644 (file)
@@ -3236,7 +3236,7 @@ func init() {
                        y := s.newValue2(ssa.OpOr64, types.Types[TUINT64], x, c)
                        return s.newValue1(ssa.OpCtz64, types.Types[TINT], y)
                },
-               sys.ARM64, sys.S390X)
+               sys.ARM64, sys.S390X, sys.PPC64)
        addF("math/bits", "TrailingZeros8",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        x := s.newValue1(ssa.OpZeroExt8to32, types.Types[TUINT32], args[0])
@@ -3427,12 +3427,12 @@ func init() {
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpPopCount16, types.Types[TINT], args[0])
                },
-               sys.ARM64, sys.S390X)
+               sys.ARM64, sys.S390X, sys.PPC64)
        addF("math/bits", "OnesCount8",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpPopCount8, types.Types[TINT], args[0])
                },
-               sys.S390X)
+               sys.S390X, sys.PPC64)
        addF("math/bits", "OnesCount",
                makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32),
                sys.AMD64)
index 21c12591c53701aaf6b5fd7dc1d2bec3e71b7e75..be1bd6de0bf562c332e889131a18076df91a842c 100644 (file)
 
 (Ctz64 x) -> (POPCNTD (ANDN <typ.Int64> (ADDconst <typ.Int64> [-1] x) x))
 (Ctz32 x) -> (POPCNTW (MOVWZreg (ANDN <typ.Int> (ADDconst <typ.Int> [-1] x) x)))
+(Ctz16 x) -> (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x)))
+(Ctz8 x) -> (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
 
 (BitLen64 x) -> (SUB (MOVDconst [64]) (CNTLZD <typ.Int> x))
 (BitLen32 x) -> (SUB (MOVDconst [32]) (CNTLZW <typ.Int> x))
 (PopCount64 x) -> (POPCNTD x)
 (PopCount32 x) -> (POPCNTW (MOVWZreg x))
 (PopCount16 x) -> (POPCNTW (MOVHZreg x))
-(PopCount8 x) -> (POPCNTB (MOVBreg x))
+(PopCount8 x) -> (POPCNTB (MOVBZreg x))
 
 (And(64|32|16|8) x y) -> (AND x y)
 (Or(64|32|16|8) x y) -> (OR x y)
index 9aff3106dba9eb9059cbc0879194474688843551..8f100c1a38c1869518100bd651681e12e1ef0cce 100644 (file)
@@ -105,6 +105,8 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpConstNil_0(v)
        case OpCopysign:
                return rewriteValuePPC64_OpCopysign_0(v)
+       case OpCtz16:
+               return rewriteValuePPC64_OpCtz16_0(v)
        case OpCtz32:
                return rewriteValuePPC64_OpCtz32_0(v)
        case OpCtz32NonZero:
@@ -113,6 +115,8 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpCtz64_0(v)
        case OpCtz64NonZero:
                return rewriteValuePPC64_OpCtz64NonZero_0(v)
+       case OpCtz8:
+               return rewriteValuePPC64_OpCtz8_0(v)
        case OpCvt32Fto32:
                return rewriteValuePPC64_OpCvt32Fto32_0(v)
        case OpCvt32Fto64:
@@ -1323,6 +1327,29 @@ func rewriteValuePPC64_OpCopysign_0(v *Value) bool {
                return true
        }
 }
+func rewriteValuePPC64_OpCtz16_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (Ctz16 x)
+       // cond:
+       // result: (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x)))
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64POPCNTW)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+               v1 := b.NewValue0(v.Pos, OpPPC64ANDN, typ.Int16)
+               v2 := b.NewValue0(v.Pos, OpPPC64ADDconst, typ.Int16)
+               v2.AuxInt = -1
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValuePPC64_OpCtz32_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -1389,6 +1416,29 @@ func rewriteValuePPC64_OpCtz64NonZero_0(v *Value) bool {
                return true
        }
 }
+func rewriteValuePPC64_OpCtz8_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (Ctz8 x)
+       // cond:
+       // result: (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64POPCNTB)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+               v1 := b.NewValue0(v.Pos, OpPPC64ANDN, typ.UInt8)
+               v2 := b.NewValue0(v.Pos, OpPPC64ADDconst, typ.UInt8)
+               v2.AuxInt = -1
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -26653,11 +26703,11 @@ func rewriteValuePPC64_OpPopCount8_0(v *Value) bool {
        _ = typ
        // match: (PopCount8 x)
        // cond:
-       // result: (POPCNTB (MOVBreg x))
+       // result: (POPCNTB (MOVBZreg x))
        for {
                x := v.Args[0]
                v.reset(OpPPC64POPCNTB)
-               v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
                v0.AddArg(x)
                v.AddArg(v0)
                return true