cmd/compile/internal/gc: refactor cgen_div

author Michael Munday <munday@ca.ibm.com>

Mon, 11 Apr 2016 01:58:37 +0000 (21:58 -0400)

committer Matthew Dempsky <mdempsky@google.com>

Mon, 11 Apr 2016 05:19:13 +0000 (05:19 +0000)
author Michael Munday <munday@ca.ibm.com>
Mon, 11 Apr 2016 01:58:37 +0000 (21:58 -0400)
committer Matthew Dempsky <mdempsky@google.com>
Mon, 11 Apr 2016 05:19:13 +0000 (05:19 +0000)
diff --git a/src/cmd/compile/internal/gc/cgen.go b/src/cmd/compile/internal/gc/cgen.go

index a9cedf7cfc2e3f80a2c53af95cb072611f123b04..eacbc30f87c23e0dd294e31c89ada7f14548a7a0 100644 (file)
--- a/src/cmd/compile/internal/gc/cgen.go
+++ b/src/cmd/compile/internal/gc/cgen.go
@@ -2622,24 +2622,48 @@ func cgen_ret(n *Node) {
         }
  }
  
+// hasHMUL64 reports whether the architecture supports 64-bit
+// signed and unsigned high multiplication (OHMUL).
+func hasHMUL64() bool {
+       switch Ctxt.Arch.Family {
+       case sys.AMD64, sys.S390X:
+               return true
+       case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64:
+               return false
+       }
+       Fatalf("unknown architecture")
+       return false
+}
+
+// hasRROTC64 reports whether the architecture supports 64-bit
+// rotate through carry instructions (ORROTC).
+func hasRROTC64() bool {
+       switch Ctxt.Arch.Family {
+       case sys.AMD64:
+               return true
+       case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64, sys.S390X:
+               return false
+       }
+       Fatalf("unknown architecture")
+       return false
+}
+
  // generate division according to op, one of:
  //     res = nl / nr
  //     res = nl % nr
  func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
         var w int
  
-       // TODO(rsc): arm64 needs to support the relevant instructions
-       // in peep and optoas in order to enable this.
-       // TODO(rsc): ppc64 needs to support the relevant instructions
-       // in peep and optoas in order to enable this.
-       if nr.Op != OLITERAL || Ctxt.Arch.Family == sys.MIPS64 || Ctxt.Arch.Family == sys.ARM64 || Ctxt.Arch.Family == sys.PPC64 {
+       // Architectures need to support 64-bit high multiplications
+       // (OHMUL) in order to perform divide by constant optimizations.
+       if nr.Op != OLITERAL || !hasHMUL64() {
                 goto longdiv
         }
         w = int(nl.Type.Width * 8)
  
         // Front end handled 32-bit division. We only need to handle 64-bit.
-       // try to do division by multiply by (2^w)/d
-       // see hacker's delight chapter 10
+       // Try to do division using multiplication: (2^w)/d.
+       // See Hacker's Delight, chapter 10.
         switch Simtype[nl.Type.Etype] {
         default:
                 goto longdiv
@@ -2652,6 +2676,17 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
                 if m.Bad != 0 {
                         break
                 }
+
+               // In order to add the numerator we need to be able to
+               // avoid overflow. This is done by shifting the result of the
+               // addition right by 1 and inserting the carry bit into
+               // the MSB. For now this needs the RROTC instruction.
+               // TODO(mundaym): Hacker's Delight 2nd ed. chapter 10 proposes
+               // an alternative sequence of instructions for architectures
+               // that do not have a shift right with carry instruction.
+               if m.Ua != 0 && !hasRROTC64() {
+                       goto longdiv
+               }
                 if op == OMOD {
                         goto longmod
                 }
@@ -2665,7 +2700,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
                 Thearch.Cgen_hmul(&n1, &n2, &n3)
  
                 if m.Ua != 0 {
-                       // need to add numerator accounting for overflow
+                       // Need to add numerator accounting for overflow.
                         Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
  
                         Nodconst(&n2, nl.Type, 1)
@@ -2703,7 +2738,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
                 Thearch.Cgen_hmul(&n1, &n2, &n3)
  
                 if m.Sm < 0 {
-                       // need to add numerator
+                       // Need to add numerator (cannot overflow).
                         Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
                 }
  
@@ -2716,8 +2751,8 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
                 Thearch.Gins(Thearch.Optoas(OSUB, nl.Type), &n1, &n3) // added
  
                 if m.Sd < 0 {
-                       // this could probably be removed
-                       // by factoring it into the multiplier
+                       // This could probably be removed by factoring it into
+                       // the multiplier.
                         Thearch.Gins(Thearch.Optoas(OMINUS, nl.Type), nil, &n3)
                 }
  
@@ -2729,14 +2764,14 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
  
         goto longdiv
  
-       // division and mod using (slow) hardware instruction
+       // Division and mod using (slow) hardware instruction.
  longdiv:
         Thearch.Dodiv(op, nl, nr, res)
  
         return
  
-       // mod using formula A%B = A-(A/B*B) but
-       // we know that there is a fast algorithm for A/B
+       // Mod using formula A%B = A-(A/B*B) but
+       // we know that there is a fast algorithm for A/B.
  longmod:
         var n1 Node
         Regalloc(&n1, nl.Type, res)
@@ -2746,11 +2781,6 @@ longmod:
         Regalloc(&n2, nl.Type, nil)
         cgen_div(ODIV, &n1, nr, &n2)
         a := Thearch.Optoas(OMUL, nl.Type)
-       if w == 8 {
-               // use 2-operand 16-bit multiply
-               // because there is no 2-operand 8-bit multiply
-               a = Thearch.Optoas(OMUL, Types[TINT16]) // XXX was IMULW
-       }
  
         if !Smallintconst(nr) {
                 var n3 Node
author	Michael Munday <munday@ca.ibm.com>
	Mon, 11 Apr 2016 01:58:37 +0000 (21:58 -0400)
committer	Matthew Dempsky <mdempsky@google.com>
	Mon, 11 Apr 2016 05:19:13 +0000 (05:19 +0000)