case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
- ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
+ ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
r := v.Reg()
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
(PrefetchCache ptr mem) => (DCBT ptr mem [0])
(PrefetchCacheStreamed ptr mem) => (DCBT ptr mem [16])
+// Use byte reverse instructions on Power10
+(Bswap(16|32|64) x) && buildcfg.GOPPC64>=10 => (BR(H|W|D) x)
{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, clobberFlags: true, typ: "(Int,Flags)"}, // arg0^arg1 sets CC
{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 (integer)
+ {name: "BRD", argLength: 1, reg: gp11, asm: "BRD"}, // reversebytes64(arg0)
+ {name: "BRW", argLength: 1, reg: gp11, asm: "BRW"}, // reversebytes32(arg0)
+ {name: "BRH", argLength: 1, reg: gp11, asm: "BRH"}, // reversebytes16(arg0)
{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"}, // -arg0 (floating point)
{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0) (floating point)
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0) (floating point, single precision)
{name: "BitLen32", argLength: 1}, // Number of bits in arg[0] (returns 0-32)
{name: "BitLen64", argLength: 1}, // Number of bits in arg[0] (returns 0-64)
+ {name: "Bswap16", argLength: 1}, // Swap bytes
{name: "Bswap32", argLength: 1}, // Swap bytes
{name: "Bswap64", argLength: 1}, // Swap bytes
OpPPC64XORCC
OpPPC64EQV
OpPPC64NEG
+ OpPPC64BRD
+ OpPPC64BRW
+ OpPPC64BRH
OpPPC64FNEG
OpPPC64FSQRT
OpPPC64FSQRTS
OpBitLen16
OpBitLen32
OpBitLen64
+ OpBswap16
OpBswap32
OpBswap64
OpBitRev8
},
},
},
+ {
+ name: "BRD",
+ argLen: 1,
+ asm: ppc64.ABRD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
+ name: "BRW",
+ argLen: 1,
+ asm: ppc64.ABRW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
+ name: "BRH",
+ argLen: 1,
+ asm: ppc64.ABRH,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
{
name: "FNEG",
argLen: 1,
argLen: 1,
generic: true,
},
+ {
+ name: "Bswap16",
+ argLen: 1,
+ generic: true,
+ },
{
name: "Bswap32",
argLen: 1,
return rewriteValuePPC64_OpBitLen32(v)
case OpBitLen64:
return rewriteValuePPC64_OpBitLen64(v)
+ case OpBswap16:
+ return rewriteValuePPC64_OpBswap16(v)
+ case OpBswap32:
+ return rewriteValuePPC64_OpBswap32(v)
+ case OpBswap64:
+ return rewriteValuePPC64_OpBswap64(v)
case OpCeil:
v.Op = OpPPC64FCEIL
return true
return true
}
}
+func rewriteValuePPC64_OpBswap16(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (Bswap16 x)
+ // cond: buildcfg.GOPPC64>=10
+ // result: (BRH x)
+ for {
+ x := v_0
+ if !(buildcfg.GOPPC64 >= 10) {
+ break
+ }
+ v.reset(OpPPC64BRH)
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
+func rewriteValuePPC64_OpBswap32(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (Bswap32 x)
+ // cond: buildcfg.GOPPC64>=10
+ // result: (BRW x)
+ for {
+ x := v_0
+ if !(buildcfg.GOPPC64 >= 10) {
+ break
+ }
+ v.reset(OpPPC64BRW)
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
+func rewriteValuePPC64_OpBswap64(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (Bswap64 x)
+ // cond: buildcfg.GOPPC64>=10
+ // result: (BRD x)
+ for {
+ x := v_0
+ if !(buildcfg.GOPPC64 >= 10) {
+ break
+ }
+ v.reset(OpPPC64BRD)
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
func rewriteValuePPC64_OpCom16(v *Value) bool {
v_0 := v.Args[0]
// match: (Com16 x)
},
sys.ARM64, sys.PPC64)
+ /* Use only on Power10 as the new byte reverse instructions that Power10 provide
+ make it worthwhile as an intrinsic */
+ brev_arch := []sys.ArchFamily{sys.AMD64, sys.ARM64, sys.ARM, sys.S390X}
+ if buildcfg.GOPPC64 >= 10 {
+ brev_arch = append(brev_arch, sys.PPC64)
+ }
/******** runtime/internal/sys ********/
addF("runtime/internal/sys", "Bswap32",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0])
},
- sys.AMD64, sys.ARM64, sys.ARM, sys.S390X)
+ brev_arch...)
addF("runtime/internal/sys", "Bswap64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0])
},
- sys.AMD64, sys.ARM64, sys.ARM, sys.S390X)
+ brev_arch...)
/****** Prefetch ******/
makePrefetchFunc := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
alias("math/bits", "ReverseBytes64", "runtime/internal/sys", "Bswap64", all...)
alias("math/bits", "ReverseBytes32", "runtime/internal/sys", "Bswap32", all...)
// ReverseBytes inlines correctly, no need to intrinsify it.
- // ReverseBytes16 lowers to a rotate, no need for anything special here.
+ // Nothing special is needed for targets where ReverseBytes16 lowers to a rotate
+ // On Power10, 16-bit rotate is not available so use BRH instruction
+ if buildcfg.GOPPC64 >= 10 {
+ addF("math/bits", "ReverseBytes16",
+ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT], args[0])
+ },
+ sys.PPC64)
+ }
+
addF("math/bits", "Len64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
// amd64:"BSWAPQ"
// s390x:"MOVDBR"
// arm64:"REV"
+ // ppc64x/power10: "BRD"
return bits.ReverseBytes64(n)
}
// amd64:"BSWAPL"
// s390x:"MOVWBR"
// arm64:"REVW"
+ // ppc64x/power10: "BRW"
return bits.ReverseBytes32(n)
}
// arm/5:"SLL","SRL","ORR"
// arm/6:"REV16"
// arm/7:"REV16"
+ // ppc64x/power10: "BRH"
return bits.ReverseBytes16(n)
}
"loong64": {},
"mips": {"GOMIPS", "hardfloat", "softfloat"},
"mips64": {"GOMIPS64", "hardfloat", "softfloat"},
- "ppc64": {"GOPPC64", "power8", "power9"},
- "ppc64le": {"GOPPC64", "power8", "power9"},
+ "ppc64": {"GOPPC64", "power8", "power9", "power10"},
+ "ppc64le": {"GOPPC64", "power8", "power9", "power10"},
"ppc64x": {}, // A pseudo-arch representing both ppc64 and ppc64le
"s390x": {},
"wasm": {},