This can be done efficiently with few instructions.
This also adds MULHDUCC for further codegen improvement.
Change-Id: I06320ba4383a679341b911a237a360ef07b19168
Reviewed-on: https://go-review.googlesource.com/c/go/+/605975
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Archana Ravindar <aravinda@redhat.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
p.To.Reg = r
case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
- ssa.OpPPC64ANDNCC:
+ ssa.OpPPC64ANDNCC, ssa.OpPPC64MULHDUCC:
r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg()
p := s.Prog(v.Op.Asm())
(Mul(32|16|8) ...) => (MULLW ...)
(Select0 (Mul64uhilo x y)) => (MULHDU x y)
(Select1 (Mul64uhilo x y)) => (MULLD x y)
+(Select0 (Mul64uover x y)) => (MULLD x y)
+(Select1 (Mul64uover x y)) => (SETBCR [2] (CMPconst [0] (MULHDU <x.Type> x y)))
(Div64 [false] x y) => (DIVD x y)
(Div64u ...) => (DIVDU ...)
{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"}, // (arg0*arg1)+arg2 (signed 64-bit)
- {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed
- {name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true}, // (arg0 * arg1) >> 32, signed
- {name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
- {name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
+ {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed
+ {name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true}, // (arg0 * arg1) >> 32, signed
+ {name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
+ {name: "MULHDUCC", argLength: 2, reg: gp21, asm: "MULHDUCC", commutative: true, typ: "(Int64,Flags)"}, // (arg0 * arg1) >> 64, unsigned, sets CC
+ {name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true}, // arg0*arg1
{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
// 2. Rewrite (CMPconst [0] (Select0 (OpCC ...))) into (Select1 (OpCC...))
// Note: to minimize potentially expensive regeneration of CC opcodes during the flagalloc pass, only rewrite if
// both ops are in the same block.
-(CMPconst [0] z:((ADD|AND|ANDN|OR|SUB|NOR|XOR) x y)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
+(CMPconst [0] z:((ADD|AND|ANDN|OR|SUB|NOR|XOR|MULHDU) x y)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
(CMPconst [0] z:((NEG|CNTLZD|RLDICL) x)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
// Note: ADDCCconst only assembles to 1 instruction for int16 constants.
(CMPconst [0] z:(ADDconst [c] x)) && int64(int16(c)) == c && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
(CMPconst [0] z:(ANDconst [c] x)) && int64(uint16(c)) == c && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
// And finally, fixup the flag user.
-(CMPconst <t> [0] (Select0 z:((ADD|AND|ANDN|OR|SUB|NOR|XOR)CC x y))) => (Select1 <t> z)
+(CMPconst <t> [0] (Select0 z:((ADD|AND|ANDN|OR|SUB|NOR|XOR|MULHDU)CC x y))) => (Select1 <t> z)
(CMPconst <t> [0] (Select0 z:((ADDCCconst|ANDCCconst|NEGCC|CNTLZDCC|RLDICLCC) y))) => (Select1 <t> z)
// After trying to convert ANDconst to ANDCCconst above, if the CC result is not needed, try to avoid using
OpPPC64MULHD
OpPPC64MULHW
OpPPC64MULHDU
+ OpPPC64MULHDUCC
OpPPC64MULHWU
OpPPC64FMUL
OpPPC64FMULS
},
},
},
+ {
+ name: "MULHDUCC",
+ argLen: 2,
+ commutative: true,
+ asm: ppc64.AMULHDUCC,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
{
name: "MULHWU",
argLen: 2,
OpPPC64ADD: OpPPC64ADDCC,
OpPPC64ADDconst: OpPPC64ADDCCconst,
OpPPC64AND: OpPPC64ANDCC,
- OpPPC64ANDconst: OpPPC64ANDCCconst,
OpPPC64ANDN: OpPPC64ANDNCC,
+ OpPPC64ANDconst: OpPPC64ANDCCconst,
OpPPC64CNTLZD: OpPPC64CNTLZDCC,
+ OpPPC64MULHDU: OpPPC64MULHDUCC,
+ OpPPC64NEG: OpPPC64NEGCC,
+ OpPPC64NOR: OpPPC64NORCC,
OpPPC64OR: OpPPC64ORCC,
OpPPC64RLDICL: OpPPC64RLDICLCC,
OpPPC64SUB: OpPPC64SUBCC,
- OpPPC64NEG: OpPPC64NEGCC,
- OpPPC64NOR: OpPPC64NORCC,
OpPPC64XOR: OpPPC64XORCC,
}
b := op.Block
v.AddArg2(x, y)
return true
}
+ // match: (Select0 (Mul64uover x y))
+ // result: (MULLD x y)
+ for {
+ if v_0.Op != OpMul64uover {
+ break
+ }
+ y := v_0.Args[1]
+ x := v_0.Args[0]
+ v.reset(OpPPC64MULLD)
+ v.AddArg2(x, y)
+ return true
+ }
// match: (Select0 (Add64carry x y c))
// result: (Select0 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1]))))
for {
v.AddArg2(x, y)
return true
}
+ // match: (Select1 (Mul64uover x y))
+ // result: (SETBCR [2] (CMPconst [0] (MULHDU <x.Type> x y)))
+ for {
+ if v_0.Op != OpMul64uover {
+ break
+ }
+ y := v_0.Args[1]
+ x := v_0.Args[0]
+ v.reset(OpPPC64SETBCR)
+ v.AuxInt = int32ToAuxInt(2)
+ v0 := b.NewValue0(v.Pos, OpPPC64CMPconst, types.TypeFlags)
+ v0.AuxInt = int64ToAuxInt(0)
+ v1 := b.NewValue0(v.Pos, OpPPC64MULHDU, x.Type)
+ v1.AddArg2(x, y)
+ v0.AddArg(v1)
+ v.AddArg(v0)
+ return true
+ }
// match: (Select1 (Add64carry x y c))
// result: (ADDZEzero (Select1 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1])))))
for {
v.AddArg(convertPPC64OpToOpCC(z))
return true
}
+ // match: (CMPconst [0] z:(MULHDU x y))
+ // cond: v.Block == z.Block
+ // result: (CMPconst [0] convertPPC64OpToOpCC(z))
+ for {
+ if auxIntToInt64(v.AuxInt) != 0 {
+ break
+ }
+ z := v_0
+ if z.Op != OpPPC64MULHDU {
+ break
+ }
+ if !(v.Block == z.Block) {
+ break
+ }
+ v.reset(OpPPC64CMPconst)
+ v.AuxInt = int64ToAuxInt(0)
+ v.AddArg(convertPPC64OpToOpCC(z))
+ return true
+ }
// match: (CMPconst [0] z:(NEG x))
// cond: v.Block == z.Block
// result: (CMPconst [0] convertPPC64OpToOpCC(z))
v.AddArg(z)
return true
}
+ // match: (CMPconst <t> [0] (Select0 z:(MULHDUCC x y)))
+ // result: (Select1 <t> z)
+ for {
+ t := v.Type
+ if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpSelect0 {
+ break
+ }
+ z := v_0.Args[0]
+ if z.Op != OpPPC64MULHDUCC {
+ break
+ }
+ v.reset(OpSelect1)
+ v.Type = t
+ v.AddArg(z)
+ return true
+ }
// match: (CMPconst <t> [0] (Select0 z:(ADDCCconst y)))
// result: (Select1 <t> z)
for {
}
return s.newValue2(ssa.OpMul64uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1])
},
- sys.AMD64, sys.I386, sys.Loong64, sys.MIPS64, sys.RISCV64, sys.ARM64)
+ sys.AMD64, sys.I386, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.ARM64)
add("runtime", "KeepAlive",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
data := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, args[0])
{"ppc64", "internal/runtime/atomic", "Xchguintptr"}: struct{}{},
{"ppc64", "internal/runtime/math", "Add64"}: struct{}{},
{"ppc64", "internal/runtime/math", "Mul64"}: struct{}{},
+ {"ppc64", "internal/runtime/math", "MulUintptr"}: struct{}{},
{"ppc64", "internal/runtime/sys", "Len64"}: struct{}{},
{"ppc64", "internal/runtime/sys", "Len8"}: struct{}{},
{"ppc64", "internal/runtime/sys", "OnesCount64"}: struct{}{},
{"ppc64le", "internal/runtime/atomic", "Xchguintptr"}: struct{}{},
{"ppc64le", "internal/runtime/math", "Add64"}: struct{}{},
{"ppc64le", "internal/runtime/math", "Mul64"}: struct{}{},
+ {"ppc64le", "internal/runtime/math", "MulUintptr"}: struct{}{},
{"ppc64le", "internal/runtime/sys", "Len64"}: struct{}{},
{"ppc64le", "internal/runtime/sys", "Len8"}: struct{}{},
{"ppc64le", "internal/runtime/sys", "OnesCount64"}: struct{}{},
x[12] = uint64(c)
}
+ // ppc64x:"MULHDUCC",^"MULHDU"
+ hi, _ := bits.Mul64(x[13], x[14])
+ if hi != 0 {
+ x[14] = hi
+ }
+
}
func constantWrite(b bool, p *bool) {