PNOP // 0700000000000000
SETB CR1,R3 // 7c640100
- VCLZLSBB V1, R2 // 10400e02
- VCTZLSBB V1, R2 // 10410e02
+ VCLZLSBB V1,R2 // 10400e02
+ VCTZLSBB V1,R2 // 10410e02
+
+ XSMAXJDP VS1,VS2,VS3 // f0611480
+ XSMINJDP VS1,VS2,VS3 // f06114c0
RET
ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
- ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
+ ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW, ssa.OpPPC64XSMINJDP, ssa.OpPPC64XSMAXJDP:
r := v.Reg()
r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg()
(Sub32F ...) => (FSUBS ...)
(Sub64F ...) => (FSUB ...)
+(Min(32|64)F x y) && buildcfg.GOPPC64 >= 9 => (XSMINJDP x y)
+(Max(32|64)F x y) && buildcfg.GOPPC64 >= 9 => (XSMAXJDP x y)
+
// Combine 64 bit integer multiply and adds
(ADD l:(MULLD x y) z) && buildcfg.GOPPC64 >= 9 && l.Uses == 1 && clobber(l) => (MADDLD x y z)
{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1
{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1
+ // Note, the FPU works with float64 in register.
+ {name: "XSMINJDP", argLength: 2, reg: fp21, asm: "XSMINJDP"}, // fmin(arg0,arg1)
+ {name: "XSMAXJDP", argLength: 2, reg: fp21, asm: "XSMAXJDP"}, // fmax(arg0,arg1)
+
{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
OpPPC64SUBFCconst
OpPPC64FSUB
OpPPC64FSUBS
+ OpPPC64XSMINJDP
+ OpPPC64XSMAXJDP
OpPPC64MULLD
OpPPC64MULLW
OpPPC64MULLDconst
},
},
},
+ {
+ name: "XSMINJDP",
+ argLen: 2,
+ asm: ppc64.AXSMINJDP,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+ {1, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+ },
+ outputs: []outputInfo{
+ {0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+ },
+ },
+ },
+ {
+ name: "XSMAXJDP",
+ argLen: 2,
+ asm: ppc64.AXSMAXJDP,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+ {1, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+ },
+ outputs: []outputInfo{
+ {0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+ },
+ },
+ },
{
name: "MULLD",
argLen: 2,
return rewriteValuePPC64_OpLsh8x64(v)
case OpLsh8x8:
return rewriteValuePPC64_OpLsh8x8(v)
+ case OpMax32F:
+ return rewriteValuePPC64_OpMax32F(v)
+ case OpMax64F:
+ return rewriteValuePPC64_OpMax64F(v)
+ case OpMin32F:
+ return rewriteValuePPC64_OpMin32F(v)
+ case OpMin64F:
+ return rewriteValuePPC64_OpMin64F(v)
case OpMod16:
return rewriteValuePPC64_OpMod16(v)
case OpMod16u:
return true
}
}
+func rewriteValuePPC64_OpMax32F(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Max32F x y)
+ // cond: buildcfg.GOPPC64 >= 9
+ // result: (XSMAXJDP x y)
+ for {
+ x := v_0
+ y := v_1
+ if !(buildcfg.GOPPC64 >= 9) {
+ break
+ }
+ v.reset(OpPPC64XSMAXJDP)
+ v.AddArg2(x, y)
+ return true
+ }
+ return false
+}
+func rewriteValuePPC64_OpMax64F(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Max64F x y)
+ // cond: buildcfg.GOPPC64 >= 9
+ // result: (XSMAXJDP x y)
+ for {
+ x := v_0
+ y := v_1
+ if !(buildcfg.GOPPC64 >= 9) {
+ break
+ }
+ v.reset(OpPPC64XSMAXJDP)
+ v.AddArg2(x, y)
+ return true
+ }
+ return false
+}
+func rewriteValuePPC64_OpMin32F(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Min32F x y)
+ // cond: buildcfg.GOPPC64 >= 9
+ // result: (XSMINJDP x y)
+ for {
+ x := v_0
+ y := v_1
+ if !(buildcfg.GOPPC64 >= 9) {
+ break
+ }
+ v.reset(OpPPC64XSMINJDP)
+ v.AddArg2(x, y)
+ return true
+ }
+ return false
+}
+func rewriteValuePPC64_OpMin64F(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Min64F x y)
+ // cond: buildcfg.GOPPC64 >= 9
+ // result: (XSMINJDP x y)
+ for {
+ x := v_0
+ y := v_1
+ if !(buildcfg.GOPPC64 >= 9) {
+ break
+ }
+ v.reset(OpPPC64XSMINJDP)
+ v.AddArg2(x, y)
+ return true
+ }
+ return false
+}
func rewriteValuePPC64_OpMod16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// string comparisons during walk, not ssagen.
if typ.IsFloat() {
+ hasIntrinsic := false
switch Arch.LinkArch.Family {
case sys.AMD64, sys.ARM64, sys.RISCV64:
+ hasIntrinsic = true
+ case sys.PPC64:
+ hasIntrinsic = buildcfg.GOPPC64 >= 9
+ }
+
+ if hasIntrinsic {
var op ssa.Op
switch {
case typ.Kind() == types.TFLOAT64 && n.Op() == ir.OMIN:
AXVCVSXWSP
AXVCVUXDSP
AXVCVUXWSP
+ AXSMAXJDP
+ AXSMINJDP
ALASTAOUT // The last instruction in this list. Also the first opcode generated by ppc64map.
// aliases
"XVCVSXWSP",
"XVCVUXDSP",
"XVCVUXWSP",
+ "XSMAXJDP",
+ "XSMINJDP",
"LASTAOUT",
}
{as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4},
{as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4},
- /* VSX logical */
- {as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx and, xx3-form */
- {as: AXXLOR, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx or, xx3-form */
+ /* VSX xx3-form */
+ {as: AXXLAND, a1: C_FREG, a2: C_FREG, a6: C_FREG, type_: 90, size: 4}, /* vsx xx3-form (FPR usage) */
+ {as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx xx3-form */
/* VSX select */
{as: AXXSEL, a1: C_VSREG, a2: C_VSREG, a3: C_VSREG, a6: C_VSREG, type_: 91, size: 4}, /* vsx select, xx4-form */
opset(AMTVSRWZ, r0)
opset(AMTVSRWS, r0)
- case AXXLAND: /* xxland, xxlandc, xxleqv, xxlnand */
+ case AXXLAND:
opset(AXXLANDC, r0)
opset(AXXLEQV, r0)
opset(AXXLNAND, r0)
-
- case AXXLOR: /* xxlorc, xxlnor, xxlor, xxlxor */
opset(AXXLORC, r0)
opset(AXXLNOR, r0)
opset(AXXLORQ, r0)
opset(AXXLXOR, r0)
+ opset(AXXLOR, r0)
+ opset(AXSMAXJDP, r0)
+ opset(AXSMINJDP, r0)
case AXXSEL: /* xxsel */
opset(AXXSEL, r0)
return OPVXX3(60, 146, 0) /* xxlor - v2.06 */
case AXXLXOR:
return OPVXX3(60, 154, 0) /* xxlxor - v2.06 */
+ case AXSMINJDP:
+ return OPVXX3(60, 152, 0) /* xsminjdp - v3.0 */
+ case AXSMAXJDP:
+ return OPVXX3(60, 144, 0) /* xsmaxjdp - v3.0 */
case AXXSEL:
return OPVXX4(60, 3, 0) /* xxsel - v2.06 */
// amd64:"MINSD"
// arm64:"FMIND"
// riscv64:"FMIN"
+ // ppc64/power9:"XSMINJDP"
+ // ppc64/power10:"XSMINJDP"
return min(a, b)
}
// amd64:"MINSD"
// arm64:"FMAXD"
// riscv64:"FMAX"
+ // ppc64/power9:"XSMAXJDP"
+ // ppc64/power10:"XSMAXJDP"
return max(a, b)
}
// amd64:"MINSS"
// arm64:"FMINS"
// riscv64:"FMINS"
+ // ppc64/power9:"XSMINJDP"
+ // ppc64/power10:"XSMINJDP"
return min(a, b)
}
// amd64:"MINSS"
// arm64:"FMAXS"
// riscv64:"FMAXS"
+ // ppc64/power9:"XSMAXJDP"
+ // ppc64/power10:"XSMAXJDP"
return max(a, b)
}