]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: ppc64x intrinsics for math/bits
authorLynn Boger <laboger@linux.vnet.ibm.com>
Mon, 24 Apr 2017 19:11:39 +0000 (15:11 -0400)
committerLynn Boger <laboger@linux.vnet.ibm.com>
Wed, 10 May 2017 12:10:56 +0000 (12:10 +0000)
This adds math/bits intrinsics for OnesCount, Len, TrailingZeros on
ppc64x.

benchmark                       old ns/op     new ns/op     delta
BenchmarkLeadingZeros-16        4.26          1.71          -59.86%
BenchmarkLeadingZeros16-16      3.04          1.83          -39.80%
BenchmarkLeadingZeros32-16      3.31          1.82          -45.02%
BenchmarkLeadingZeros64-16      3.69          1.71          -53.66%
BenchmarkTrailingZeros-16       2.55          1.62          -36.47%
BenchmarkTrailingZeros32-16     2.55          1.77          -30.59%
BenchmarkTrailingZeros64-16     2.78          1.62          -41.73%
BenchmarkOnesCount-16           3.19          0.93          -70.85%
BenchmarkOnesCount32-16         2.55          1.18          -53.73%
BenchmarkOnesCount64-16         3.22          0.93          -71.12%

Update #18616

I also made a change to bits_test.go because when debugging some failures
the output was not quite providing the right argument information.

Change-Id: Ia58d31d1777cf4582a4505f85b11a1202ca07d3e
Reviewed-on: https://go-review.googlesource.com/41630
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ppc64/ssa.go
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/PPC64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/math/bits/bits_test.go

index d12d45f0097ba9356ae85bb236cb36f824877539..7d53595c4951dd1f2c196eea8186c79112f0d394 100644 (file)
@@ -2730,12 +2730,12 @@ func init() {
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpCtz64, types.Types[TINT], args[0])
                },
-               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
        addF("math/bits", "TrailingZeros32",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpCtz32, types.Types[TINT], args[0])
                },
-               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
        addF("math/bits", "TrailingZeros16",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        x := s.newValue1(ssa.OpZeroExt16to32, types.Types[TUINT32], args[0])
@@ -2776,7 +2776,7 @@ func init() {
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpBitLen64, types.Types[TINT], args[0])
                },
-               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
        addF("math/bits", "Len32",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        if s.config.PtrSize == 4 {
@@ -2785,7 +2785,7 @@ func init() {
                        x := s.newValue1(ssa.OpZeroExt32to64, types.Types[TUINT64], args[0])
                        return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
                },
-               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
        addF("math/bits", "Len16",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        if s.config.PtrSize == 4 {
@@ -2795,7 +2795,7 @@ func init() {
                        x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0])
                        return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
                },
-               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
        // Note: disabled on AMD64 because the Go code is faster!
        addF("math/bits", "Len8",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
@@ -2806,7 +2806,7 @@ func init() {
                        x := s.newValue1(ssa.OpZeroExt8to64, types.Types[TUINT64], args[0])
                        return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
                },
-               sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+               sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
 
        addF("math/bits", "Len",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
@@ -2815,7 +2815,7 @@ func init() {
                        }
                        return s.newValue1(ssa.OpBitLen64, types.Types[TINT], args[0])
                },
-               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
        // LeadingZeros is handled because it trivially calls Len.
        addF("math/bits", "Reverse64",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
@@ -2845,7 +2845,7 @@ func init() {
                        return s.newValue1(ssa.OpBitRev64, types.Types[TINT], args[0])
                },
                sys.ARM64)
-       makeOnesCount := func(op64 ssa.Op, op32 ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+       makeOnesCountAMD64 := func(op64 ssa.Op, op32 ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        aux := s.lookupSymbol(n, &ssa.ExternSymbol{Sym: syslook("support_popcnt").Sym.Linksym()})
                        addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), aux, s.sb)
@@ -2881,17 +2881,27 @@ func init() {
                }
        }
        addF("math/bits", "OnesCount64",
-               makeOnesCount(ssa.OpPopCount64, ssa.OpPopCount64),
+               makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount64),
                sys.AMD64)
+       addF("math/bits", "OnesCount64",
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       return s.newValue1(ssa.OpPopCount64, types.Types[TINT], args[0])
+               },
+               sys.PPC64)
        addF("math/bits", "OnesCount32",
-               makeOnesCount(ssa.OpPopCount32, ssa.OpPopCount32),
+               makeOnesCountAMD64(ssa.OpPopCount32, ssa.OpPopCount32),
                sys.AMD64)
+       addF("math/bits", "OnesCount32",
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       return s.newValue1(ssa.OpPopCount32, types.Types[TINT], args[0])
+               },
+               sys.PPC64)
        addF("math/bits", "OnesCount16",
-               makeOnesCount(ssa.OpPopCount16, ssa.OpPopCount16),
+               makeOnesCountAMD64(ssa.OpPopCount16, ssa.OpPopCount16),
                sys.AMD64)
        // Note: no OnesCount8, the Go implementation is faster - just a table load.
        addF("math/bits", "OnesCount",
-               makeOnesCount(ssa.OpPopCount64, ssa.OpPopCount32),
+               makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32),
                sys.AMD64)
 
        /******** sync/atomic ********/
index 5d902cdae1c44ec79d867229b8d0ed0485f88547..a95dabccf0be1215e2dab583930287d76a18ede4 100644 (file)
@@ -596,7 +596,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
 
-       case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP:
+       case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB:
                r := v.Reg()
                p := s.Prog(v.Op.Asm())
                p.To.Type = obj.TYPE_REG
index 3caeda24889e43b216bca465b74d6fbe8006de05..c246b309ea7120a1a9f346dfa4eb6a28931e4a68 100644 (file)
 // (Addr {sym} base) -> (ADDconst {sym} base)
 (OffPtr [off] ptr) -> (ADD (MOVDconst <typ.Int64> [off]) ptr)
 
+(Ctz64 x) -> (POPCNTD (ANDN <types.Int64> (ADDconst <types.Int64> [-1] x) x))
+(Ctz32 x) -> (POPCNTW (MOVWZreg (ANDN <types.Int> (ADDconst <types.Int> [-1] x) x)))
+
+(BitLen64 x) -> (SUB (MOVDconst [64]) (CNTLZD <types.Int> x))
+(BitLen32 x) -> (SUB (MOVDconst [32]) (CNTLZW <types.Int> x))
+
+(PopCount64 x) -> (POPCNTD x)
+(PopCount32 x) -> (POPCNTW (MOVWZreg x))
+(PopCount16 x) -> (POPCNTW (MOVHZreg x))
+(PopCount8 x) -> (POPCNTB (MOVBreg x))
+
 (And64 x y) -> (AND x y)
 (And32 x y) -> (AND x y)
 (And16 x y) -> (AND x y)
index f198c139da301977f170e67666761779de67e75b..2e8e239f15f227adeaa9438541e164f1198db197 100644 (file)
@@ -198,6 +198,13 @@ func init() {
                {name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
                {name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
 
+               {name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
+               {name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
+
+               {name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
+               {name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
+               {name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresonding byte
+
                {name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
                {name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
 
index 938743897ee55c7c1b0be5ca0d9eacfc2244da90..ae2dd5f5500c867731e05d65b61d19f044087956 100644 (file)
@@ -1294,6 +1294,11 @@ const (
        OpPPC64SLWconst
        OpPPC64ROTLconst
        OpPPC64ROTLWconst
+       OpPPC64CNTLZD
+       OpPPC64CNTLZW
+       OpPPC64POPCNTD
+       OpPPC64POPCNTW
+       OpPPC64POPCNTB
        OpPPC64FDIV
        OpPPC64FDIVS
        OpPPC64DIVD
@@ -16568,6 +16573,73 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "CNTLZD",
+               argLen:       1,
+               clobberFlags: true,
+               asm:          ppc64.ACNTLZD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
+       {
+               name:         "CNTLZW",
+               argLen:       1,
+               clobberFlags: true,
+               asm:          ppc64.ACNTLZW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
+       {
+               name:   "POPCNTD",
+               argLen: 1,
+               asm:    ppc64.APOPCNTD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
+       {
+               name:   "POPCNTW",
+               argLen: 1,
+               asm:    ppc64.APOPCNTW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
+       {
+               name:   "POPCNTB",
+               argLen: 1,
+               asm:    ppc64.APOPCNTB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
        {
                name:   "FDIV",
                argLen: 2,
index e3eab943d79e5639be092fd99f4c87f6e8fc3f56..4d49b67eb9c94a304a0c7535ed3104aa458ae042 100644 (file)
@@ -69,6 +69,10 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpAtomicStore64_0(v)
        case OpAvg64u:
                return rewriteValuePPC64_OpAvg64u_0(v)
+       case OpBitLen32:
+               return rewriteValuePPC64_OpBitLen32_0(v)
+       case OpBitLen64:
+               return rewriteValuePPC64_OpBitLen64_0(v)
        case OpClosureCall:
                return rewriteValuePPC64_OpClosureCall_0(v)
        case OpCom16:
@@ -97,6 +101,10 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpConstNil_0(v)
        case OpConvert:
                return rewriteValuePPC64_OpConvert_0(v)
+       case OpCtz32:
+               return rewriteValuePPC64_OpCtz32_0(v)
+       case OpCtz64:
+               return rewriteValuePPC64_OpCtz64_0(v)
        case OpCvt32Fto32:
                return rewriteValuePPC64_OpCvt32Fto32_0(v)
        case OpCvt32Fto64:
@@ -465,6 +473,14 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpPPC64XOR_0(v)
        case OpPPC64XORconst:
                return rewriteValuePPC64_OpPPC64XORconst_0(v)
+       case OpPopCount16:
+               return rewriteValuePPC64_OpPopCount16_0(v)
+       case OpPopCount32:
+               return rewriteValuePPC64_OpPopCount32_0(v)
+       case OpPopCount64:
+               return rewriteValuePPC64_OpPopCount64_0(v)
+       case OpPopCount8:
+               return rewriteValuePPC64_OpPopCount8_0(v)
        case OpRound32F:
                return rewriteValuePPC64_OpRound32F_0(v)
        case OpRound64F:
@@ -988,6 +1004,46 @@ func rewriteValuePPC64_OpAvg64u_0(v *Value) bool {
                return true
        }
 }
+func rewriteValuePPC64_OpBitLen32_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (BitLen32 x)
+       // cond:
+       // result: (SUB (MOVDconst [32]) (CNTLZW <types.Int> x))
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64SUB)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVDconst, types.Int64)
+               v0.AuxInt = 32
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpPPC64CNTLZW, types.Int)
+               v1.AddArg(x)
+               v.AddArg(v1)
+               return true
+       }
+}
+func rewriteValuePPC64_OpBitLen64_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (BitLen64 x)
+       // cond:
+       // result: (SUB (MOVDconst [64]) (CNTLZD <types.Int> x))
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64SUB)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVDconst, types.Int64)
+               v0.AuxInt = 64
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpPPC64CNTLZD, types.Int)
+               v1.AddArg(x)
+               v.AddArg(v1)
+               return true
+       }
+}
 func rewriteValuePPC64_OpClosureCall_0(v *Value) bool {
        // match: (ClosureCall [argwid] entry closure mem)
        // cond:
@@ -1155,6 +1211,50 @@ func rewriteValuePPC64_OpConvert_0(v *Value) bool {
                return true
        }
 }
+func rewriteValuePPC64_OpCtz32_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (Ctz32 x)
+       // cond:
+       // result: (POPCNTW (MOVWZreg (ANDN <types.Int> (ADDconst <types.Int> [-1] x) x)))
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64POPCNTW)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVWZreg, types.Int64)
+               v1 := b.NewValue0(v.Pos, OpPPC64ANDN, types.Int)
+               v2 := b.NewValue0(v.Pos, OpPPC64ADDconst, types.Int)
+               v2.AuxInt = -1
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValuePPC64_OpCtz64_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (Ctz64 x)
+       // cond:
+       // result: (POPCNTD (ANDN <types.Int64> (ADDconst <types.Int64> [-1] x) x))
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64POPCNTD)
+               v0 := b.NewValue0(v.Pos, OpPPC64ANDN, types.Int64)
+               v1 := b.NewValue0(v.Pos, OpPPC64ADDconst, types.Int64)
+               v1.AuxInt = -1
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -7944,6 +8044,68 @@ func rewriteValuePPC64_OpPPC64XORconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValuePPC64_OpPopCount16_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (PopCount16 x)
+       // cond:
+       // result: (POPCNTW (MOVHZreg x))
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64POPCNTW)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, types.Int64)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValuePPC64_OpPopCount32_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (PopCount32 x)
+       // cond:
+       // result: (POPCNTW (MOVWZreg x))
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64POPCNTW)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVWZreg, types.Int64)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValuePPC64_OpPopCount64_0(v *Value) bool {
+       // match: (PopCount64 x)
+       // cond:
+       // result: (POPCNTD x)
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64POPCNTD)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValuePPC64_OpPopCount8_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (PopCount8 x)
+       // cond:
+       // result: (POPCNTB (MOVBreg x))
+       for {
+               x := v.Args[0]
+               v.reset(OpPPC64POPCNTB)
+               v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, types.Int64)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValuePPC64_OpRound32F_0(v *Value) bool {
        // match: (Round32F x)
        // cond:
index da846049d480235eeb5182d439870a809fa5cf72..ba05210c9be17fc74dde013f08997344b2ed29d1 100644 (file)
@@ -254,26 +254,26 @@ func testOnesCount(t *testing.T, x uint64, want int) {
        if x <= 1<<8-1 {
                got := OnesCount8(uint8(x))
                if got != want {
-                       t.Fatalf("OnesCount8(%#02x) == %d; want %d", x, got, want)
+                       t.Fatalf("OnesCount8(%#02x) == %d; want %d", uint8(x), got, want)
                }
        }
 
        if x <= 1<<16-1 {
                got := OnesCount16(uint16(x))
                if got != want {
-                       t.Fatalf("OnesCount16(%#04x) == %d; want %d", x, got, want)
+                       t.Fatalf("OnesCount16(%#04x) == %d; want %d", uint16(x), got, want)
                }
        }
 
        if x <= 1<<32-1 {
                got := OnesCount32(uint32(x))
                if got != want {
-                       t.Fatalf("OnesCount32(%#08x) == %d; want %d", x, got, want)
+                       t.Fatalf("OnesCount32(%#08x) == %d; want %d", uint32(x), got, want)
                }
                if UintSize == 32 {
                        got = OnesCount(uint(x))
                        if got != want {
-                               t.Fatalf("OnesCount(%#08x) == %d; want %d", x, got, want)
+                               t.Fatalf("OnesCount(%#08x) == %d; want %d", uint32(x), got, want)
                        }
                }
        }