func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCtz64, types.Types[TINT], args[0])
},
- sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+ sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
addF("math/bits", "TrailingZeros32",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCtz32, types.Types[TINT], args[0])
},
- sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+ sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
addF("math/bits", "TrailingZeros16",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
x := s.newValue1(ssa.OpZeroExt16to32, types.Types[TUINT32], args[0])
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], args[0])
},
- sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+ sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
addF("math/bits", "Len32",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
if s.config.PtrSize == 4 {
x := s.newValue1(ssa.OpZeroExt32to64, types.Types[TUINT64], args[0])
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
},
- sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+ sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
addF("math/bits", "Len16",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
if s.config.PtrSize == 4 {
x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0])
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
},
- sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+ sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
// Note: disabled on AMD64 because the Go code is faster!
addF("math/bits", "Len8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
x := s.newValue1(ssa.OpZeroExt8to64, types.Types[TUINT64], args[0])
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], x)
},
- sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+ sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
addF("math/bits", "Len",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
}
return s.newValue1(ssa.OpBitLen64, types.Types[TINT], args[0])
},
- sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS)
+ sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64)
// LeadingZeros is handled because it trivially calls Len.
addF("math/bits", "Reverse64",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitRev64, types.Types[TINT], args[0])
},
sys.ARM64)
- makeOnesCount := func(op64 ssa.Op, op32 ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ makeOnesCountAMD64 := func(op64 ssa.Op, op32 ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
aux := s.lookupSymbol(n, &ssa.ExternSymbol{Sym: syslook("support_popcnt").Sym.Linksym()})
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), aux, s.sb)
}
}
addF("math/bits", "OnesCount64",
- makeOnesCount(ssa.OpPopCount64, ssa.OpPopCount64),
+ makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount64),
sys.AMD64)
+ addF("math/bits", "OnesCount64",
+ func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ return s.newValue1(ssa.OpPopCount64, types.Types[TINT], args[0])
+ },
+ sys.PPC64)
addF("math/bits", "OnesCount32",
- makeOnesCount(ssa.OpPopCount32, ssa.OpPopCount32),
+ makeOnesCountAMD64(ssa.OpPopCount32, ssa.OpPopCount32),
sys.AMD64)
+ addF("math/bits", "OnesCount32",
+ func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ return s.newValue1(ssa.OpPopCount32, types.Types[TINT], args[0])
+ },
+ sys.PPC64)
addF("math/bits", "OnesCount16",
- makeOnesCount(ssa.OpPopCount16, ssa.OpPopCount16),
+ makeOnesCountAMD64(ssa.OpPopCount16, ssa.OpPopCount16),
sys.AMD64)
// Note: no OnesCount8, the Go implementation is faster - just a table load.
addF("math/bits", "OnesCount",
- makeOnesCount(ssa.OpPopCount64, ssa.OpPopCount32),
+ makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32),
sys.AMD64)
/******** sync/atomic ********/
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
- case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP:
+ case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB:
r := v.Reg()
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
// (Addr {sym} base) -> (ADDconst {sym} base)
(OffPtr [off] ptr) -> (ADD (MOVDconst <typ.Int64> [off]) ptr)
+(Ctz64 x) -> (POPCNTD (ANDN <types.Int64> (ADDconst <types.Int64> [-1] x) x))
+(Ctz32 x) -> (POPCNTW (MOVWZreg (ANDN <types.Int> (ADDconst <types.Int> [-1] x) x)))
+
+(BitLen64 x) -> (SUB (MOVDconst [64]) (CNTLZD <types.Int> x))
+(BitLen32 x) -> (SUB (MOVDconst [32]) (CNTLZW <types.Int> x))
+
+(PopCount64 x) -> (POPCNTD x)
+(PopCount32 x) -> (POPCNTW (MOVWZreg x))
+(PopCount16 x) -> (POPCNTW (MOVHZreg x))
+(PopCount8 x) -> (POPCNTB (MOVBreg x))
+
(And64 x y) -> (AND x y)
(And32 x y) -> (AND x y)
(And16 x y) -> (AND x y)
{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"}, // arg0 rotate left by auxInt bits
{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
+ {name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
+ {name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
+
+ {name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
+ {name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
+ {name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresonding byte
+
{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"}, // arg0/arg1
{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
OpPPC64SLWconst
OpPPC64ROTLconst
OpPPC64ROTLWconst
+ OpPPC64CNTLZD
+ OpPPC64CNTLZW
+ OpPPC64POPCNTD
+ OpPPC64POPCNTW
+ OpPPC64POPCNTB
OpPPC64FDIV
OpPPC64FDIVS
OpPPC64DIVD
},
},
},
+ {
+ name: "CNTLZD",
+ argLen: 1,
+ clobberFlags: true,
+ asm: ppc64.ACNTLZD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
+ name: "CNTLZW",
+ argLen: 1,
+ clobberFlags: true,
+ asm: ppc64.ACNTLZW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
+ name: "POPCNTD",
+ argLen: 1,
+ asm: ppc64.APOPCNTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
+ name: "POPCNTW",
+ argLen: 1,
+ asm: ppc64.APOPCNTW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
+ name: "POPCNTB",
+ argLen: 1,
+ asm: ppc64.APOPCNTB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
{
name: "FDIV",
argLen: 2,
return rewriteValuePPC64_OpAtomicStore64_0(v)
case OpAvg64u:
return rewriteValuePPC64_OpAvg64u_0(v)
+ case OpBitLen32:
+ return rewriteValuePPC64_OpBitLen32_0(v)
+ case OpBitLen64:
+ return rewriteValuePPC64_OpBitLen64_0(v)
case OpClosureCall:
return rewriteValuePPC64_OpClosureCall_0(v)
case OpCom16:
return rewriteValuePPC64_OpConstNil_0(v)
case OpConvert:
return rewriteValuePPC64_OpConvert_0(v)
+ case OpCtz32:
+ return rewriteValuePPC64_OpCtz32_0(v)
+ case OpCtz64:
+ return rewriteValuePPC64_OpCtz64_0(v)
case OpCvt32Fto32:
return rewriteValuePPC64_OpCvt32Fto32_0(v)
case OpCvt32Fto64:
return rewriteValuePPC64_OpPPC64XOR_0(v)
case OpPPC64XORconst:
return rewriteValuePPC64_OpPPC64XORconst_0(v)
+ case OpPopCount16:
+ return rewriteValuePPC64_OpPopCount16_0(v)
+ case OpPopCount32:
+ return rewriteValuePPC64_OpPopCount32_0(v)
+ case OpPopCount64:
+ return rewriteValuePPC64_OpPopCount64_0(v)
+ case OpPopCount8:
+ return rewriteValuePPC64_OpPopCount8_0(v)
case OpRound32F:
return rewriteValuePPC64_OpRound32F_0(v)
case OpRound64F:
return true
}
}
+func rewriteValuePPC64_OpBitLen32_0(v *Value) bool {
+ b := v.Block
+ _ = b
+ types := &b.Func.Config.Types
+ _ = types
+ // match: (BitLen32 x)
+ // cond:
+ // result: (SUB (MOVDconst [32]) (CNTLZW <types.Int> x))
+ for {
+ x := v.Args[0]
+ v.reset(OpPPC64SUB)
+ v0 := b.NewValue0(v.Pos, OpPPC64MOVDconst, types.Int64)
+ v0.AuxInt = 32
+ v.AddArg(v0)
+ v1 := b.NewValue0(v.Pos, OpPPC64CNTLZW, types.Int)
+ v1.AddArg(x)
+ v.AddArg(v1)
+ return true
+ }
+}
+func rewriteValuePPC64_OpBitLen64_0(v *Value) bool {
+ b := v.Block
+ _ = b
+ types := &b.Func.Config.Types
+ _ = types
+ // match: (BitLen64 x)
+ // cond:
+ // result: (SUB (MOVDconst [64]) (CNTLZD <types.Int> x))
+ for {
+ x := v.Args[0]
+ v.reset(OpPPC64SUB)
+ v0 := b.NewValue0(v.Pos, OpPPC64MOVDconst, types.Int64)
+ v0.AuxInt = 64
+ v.AddArg(v0)
+ v1 := b.NewValue0(v.Pos, OpPPC64CNTLZD, types.Int)
+ v1.AddArg(x)
+ v.AddArg(v1)
+ return true
+ }
+}
func rewriteValuePPC64_OpClosureCall_0(v *Value) bool {
// match: (ClosureCall [argwid] entry closure mem)
// cond:
return true
}
}
+func rewriteValuePPC64_OpCtz32_0(v *Value) bool {
+ b := v.Block
+ _ = b
+ types := &b.Func.Config.Types
+ _ = types
+ // match: (Ctz32 x)
+ // cond:
+ // result: (POPCNTW (MOVWZreg (ANDN <types.Int> (ADDconst <types.Int> [-1] x) x)))
+ for {
+ x := v.Args[0]
+ v.reset(OpPPC64POPCNTW)
+ v0 := b.NewValue0(v.Pos, OpPPC64MOVWZreg, types.Int64)
+ v1 := b.NewValue0(v.Pos, OpPPC64ANDN, types.Int)
+ v2 := b.NewValue0(v.Pos, OpPPC64ADDconst, types.Int)
+ v2.AuxInt = -1
+ v2.AddArg(x)
+ v1.AddArg(v2)
+ v1.AddArg(x)
+ v0.AddArg(v1)
+ v.AddArg(v0)
+ return true
+ }
+}
+func rewriteValuePPC64_OpCtz64_0(v *Value) bool {
+ b := v.Block
+ _ = b
+ types := &b.Func.Config.Types
+ _ = types
+ // match: (Ctz64 x)
+ // cond:
+ // result: (POPCNTD (ANDN <types.Int64> (ADDconst <types.Int64> [-1] x) x))
+ for {
+ x := v.Args[0]
+ v.reset(OpPPC64POPCNTD)
+ v0 := b.NewValue0(v.Pos, OpPPC64ANDN, types.Int64)
+ v1 := b.NewValue0(v.Pos, OpPPC64ADDconst, types.Int64)
+ v1.AuxInt = -1
+ v1.AddArg(x)
+ v0.AddArg(v1)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool {
b := v.Block
_ = b
}
return false
}
+func rewriteValuePPC64_OpPopCount16_0(v *Value) bool {
+ b := v.Block
+ _ = b
+ types := &b.Func.Config.Types
+ _ = types
+ // match: (PopCount16 x)
+ // cond:
+ // result: (POPCNTW (MOVHZreg x))
+ for {
+ x := v.Args[0]
+ v.reset(OpPPC64POPCNTW)
+ v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, types.Int64)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+}
+func rewriteValuePPC64_OpPopCount32_0(v *Value) bool {
+ b := v.Block
+ _ = b
+ types := &b.Func.Config.Types
+ _ = types
+ // match: (PopCount32 x)
+ // cond:
+ // result: (POPCNTW (MOVWZreg x))
+ for {
+ x := v.Args[0]
+ v.reset(OpPPC64POPCNTW)
+ v0 := b.NewValue0(v.Pos, OpPPC64MOVWZreg, types.Int64)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+}
+func rewriteValuePPC64_OpPopCount64_0(v *Value) bool {
+ // match: (PopCount64 x)
+ // cond:
+ // result: (POPCNTD x)
+ for {
+ x := v.Args[0]
+ v.reset(OpPPC64POPCNTD)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValuePPC64_OpPopCount8_0(v *Value) bool {
+ b := v.Block
+ _ = b
+ types := &b.Func.Config.Types
+ _ = types
+ // match: (PopCount8 x)
+ // cond:
+ // result: (POPCNTB (MOVBreg x))
+ for {
+ x := v.Args[0]
+ v.reset(OpPPC64POPCNTB)
+ v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, types.Int64)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValuePPC64_OpRound32F_0(v *Value) bool {
// match: (Round32F x)
// cond:
if x <= 1<<8-1 {
got := OnesCount8(uint8(x))
if got != want {
- t.Fatalf("OnesCount8(%#02x) == %d; want %d", x, got, want)
+ t.Fatalf("OnesCount8(%#02x) == %d; want %d", uint8(x), got, want)
}
}
if x <= 1<<16-1 {
got := OnesCount16(uint16(x))
if got != want {
- t.Fatalf("OnesCount16(%#04x) == %d; want %d", x, got, want)
+ t.Fatalf("OnesCount16(%#04x) == %d; want %d", uint16(x), got, want)
}
}
if x <= 1<<32-1 {
got := OnesCount32(uint32(x))
if got != want {
- t.Fatalf("OnesCount32(%#08x) == %d; want %d", x, got, want)
+ t.Fatalf("OnesCount32(%#08x) == %d; want %d", uint32(x), got, want)
}
if UintSize == 32 {
got = OnesCount(uint(x))
if got != want {
- t.Fatalf("OnesCount(%#08x) == %d; want %d", x, got, want)
+ t.Fatalf("OnesCount(%#08x) == %d; want %d", uint32(x), got, want)
}
}
}