}
}
+// hasHMUL64 reports whether the architecture supports 64-bit
+// signed and unsigned high multiplication (OHMUL).
+func hasHMUL64() bool {
+ switch Ctxt.Arch.Family {
+ case sys.AMD64, sys.S390X:
+ return true
+ case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64:
+ return false
+ }
+ Fatalf("unknown architecture")
+ return false
+}
+
+// hasRROTC64 reports whether the architecture supports 64-bit
+// rotate through carry instructions (ORROTC).
+func hasRROTC64() bool {
+ switch Ctxt.Arch.Family {
+ case sys.AMD64:
+ return true
+ case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64, sys.S390X:
+ return false
+ }
+ Fatalf("unknown architecture")
+ return false
+}
+
// generate division according to op, one of:
// res = nl / nr
// res = nl % nr
func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
var w int
- // TODO(rsc): arm64 needs to support the relevant instructions
- // in peep and optoas in order to enable this.
- // TODO(rsc): ppc64 needs to support the relevant instructions
- // in peep and optoas in order to enable this.
- if nr.Op != OLITERAL || Ctxt.Arch.Family == sys.MIPS64 || Ctxt.Arch.Family == sys.ARM64 || Ctxt.Arch.Family == sys.PPC64 {
+ // Architectures need to support 64-bit high multiplications
+ // (OHMUL) in order to perform divide by constant optimizations.
+ if nr.Op != OLITERAL || !hasHMUL64() {
goto longdiv
}
w = int(nl.Type.Width * 8)
// Front end handled 32-bit division. We only need to handle 64-bit.
- // try to do division by multiply by (2^w)/d
- // see hacker's delight chapter 10
+ // Try to do division using multiplication: (2^w)/d.
+ // See Hacker's Delight, chapter 10.
switch Simtype[nl.Type.Etype] {
default:
goto longdiv
if m.Bad != 0 {
break
}
+
+ // In order to add the numerator we need to be able to
+ // avoid overflow. This is done by shifting the result of the
+ // addition right by 1 and inserting the carry bit into
+ // the MSB. For now this needs the RROTC instruction.
+ // TODO(mundaym): Hacker's Delight 2nd ed. chapter 10 proposes
+ // an alternative sequence of instructions for architectures
+ // that do not have a shift right with carry instruction.
+ if m.Ua != 0 && !hasRROTC64() {
+ goto longdiv
+ }
if op == OMOD {
goto longmod
}
Thearch.Cgen_hmul(&n1, &n2, &n3)
if m.Ua != 0 {
- // need to add numerator accounting for overflow
+ // Need to add numerator accounting for overflow.
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
Nodconst(&n2, nl.Type, 1)
Thearch.Cgen_hmul(&n1, &n2, &n3)
if m.Sm < 0 {
- // need to add numerator
+ // Need to add numerator (cannot overflow).
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
}
Thearch.Gins(Thearch.Optoas(OSUB, nl.Type), &n1, &n3) // added
if m.Sd < 0 {
- // this could probably be removed
- // by factoring it into the multiplier
+ // This could probably be removed by factoring it into
+ // the multiplier.
Thearch.Gins(Thearch.Optoas(OMINUS, nl.Type), nil, &n3)
}
goto longdiv
- // division and mod using (slow) hardware instruction
+ // Division and mod using (slow) hardware instruction.
longdiv:
Thearch.Dodiv(op, nl, nr, res)
return
- // mod using formula A%B = A-(A/B*B) but
- // we know that there is a fast algorithm for A/B
+ // Mod using formula A%B = A-(A/B*B) but
+ // we know that there is a fast algorithm for A/B.
longmod:
var n1 Node
Regalloc(&n1, nl.Type, res)
Regalloc(&n2, nl.Type, nil)
cgen_div(ODIV, &n1, nr, &n2)
a := Thearch.Optoas(OMUL, nl.Type)
- if w == 8 {
- // use 2-operand 16-bit multiply
- // because there is no 2-operand 8-bit multiply
- a = Thearch.Optoas(OMUL, Types[TINT16]) // XXX was IMULW
- }
if !Smallintconst(nr) {
var n3 Node