]> Cypherpunks repositories - gostls13.git/commitdiff
more work on divide by constant.
authorKen Thompson <ken@golang.org>
Sat, 8 Aug 2009 03:21:04 +0000 (20:21 -0700)
committerKen Thompson <ken@golang.org>
Sat, 8 Aug 2009 03:21:04 +0000 (20:21 -0700)
no real change -- disabled because of bugs

R=rsc
OCL=32923
CL=32923

src/cmd/6g/gg.h
src/cmd/6g/ggen.c
src/cmd/6g/gsubr.c

index f9f50cc9d421899389ac04bd89e191854099b8a1..6ba975bad26e5400be65dc9b03ba4eeca667c668 100644 (file)
@@ -42,6 +42,23 @@ struct       Prog
        void*   reg;            // pointer to containing Reg struct
 };
 
+typedef        struct  Magic Magic;
+struct Magic
+{
+       int     w;      // input for both - width
+       int     s;      // output for both - shift
+       int     bad;    // output for both - unexpected failure
+
+       // magic multiplier for signed literal divisors
+       int64   sd;     // input - literal divisor
+       int64   sm;     // output - multiplier
+
+       // magic multiplier for unsigned literal divisors
+       uint64  ud;     // input - literal divisor
+       uint64  um;     // output - multiplier
+       int     ua;     // output - adder
+};
+
 EXTERN Biobuf* bout;
 EXTERN int32   dynloc;
 EXTERN uchar   reg[D_NONE];
@@ -127,6 +144,8 @@ void        afunclit(Addr*);
 void   datagostring(Strlit*, Addr*);
 int    powtwo(Node*);
 Type*  tounsigned(Type*);
+void   smagic(Magic*);
+void   umagic(Magic*);
 
 /*
  * obj.c
index 4e71f75e5cbd7c8787e97d1ba56ad92018d2fd17..b0fa9c725537e1d0eca0c20b61fcdbdaf0d345b7 100644 (file)
@@ -525,6 +525,36 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
                gmove(dx, res);
 }
 
+static void
+savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
+{
+       int r;
+
+       r = reg[dr];
+
+       nodreg(x, types[TINT64], dr);
+       regalloc(x, t, x);
+
+       // save current ax and dx if they are live
+       // and not the destination
+       memset(oldx, 0, sizeof *oldx);
+       if(r > 0 && !samereg(x, res)) {
+               regalloc(oldx, t, N);
+               gmove(x, oldx);
+       }
+}
+
+static void
+restx(Node *x, Node *oldx)
+{
+       regfree(x);
+
+       if(oldx->op != 0) {
+               gmove(oldx, x);
+               regfree(oldx);
+       }
+}
+
 /*
  * generate division according to op, one of:
  *     res = nl / nr
@@ -533,18 +563,20 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
 void
 cgen_div(int op, Node *nl, Node *nr, Node *res)
 {
-       Node ax, dx, oldax, olddx, n1, n2, n3;
-       int rax, rdx, n, w;
+       Node ax, dx, cx, oldax, olddx, oldcx;
+       Node n1, n2, n3, savl, savr;
+       int n, w, s;
+       Magic m;
 
        if(nl->ullman >= UINF) {
-               tempname(&n1, nl->type);
-               cgen(nl, &n1);
-               nl = &n1;
+               tempname(&savl, nl->type);
+               cgen(nl, &savl);
+               nl = &savl;
        }
        if(nr->ullman >= UINF) {
-               tempname(&n2, nr->type);
-               cgen(nr, &n2);
-               nr = &n2;
+               tempname(&savr, nr->type);
+               cgen(nr, &savr);
+               nr = &savr;
        }
 
        if(nr->op != OLITERAL)
@@ -552,8 +584,14 @@ cgen_div(int op, Node *nl, Node *nr, Node *res)
 
        // special cases of mod/div
        // by a constant
-       n = powtwo(nr);
        w = nl->type->width*8;
+       s = 0;
+       n = powtwo(nr);
+       if(n >= 1000) {
+               // negative power of 2
+               s = 1;
+               n -= 1000;
+       }
 
        if(n+1 >= w) {
                // just sign bit
@@ -571,97 +609,132 @@ cgen_div(int op, Node *nl, Node *nr, Node *res)
        switch(n) {
        case 0:
                // divide by 1
-               cgen(nl, res);
-               return;
-       case 1:
-               // divide by 2
                regalloc(&n1, nl->type, res);
                cgen(nl, &n1);
-               if(issigned[nl->type->etype]) {
-                       // develop -1 iff nl is negative
-                       regalloc(&n2, nl->type, N);
-                       gmove(&n1, &n2);
-                       nodconst(&n3, nl->type, w-1);
-                       gins(optoas(ORSH, nl->type), &n3, &n2);
-                       gins(optoas(OSUB, nl->type), &n2, &n1);
-                       regfree(&n2);
-               }
-               nodconst(&n2, nl->type, n);
-               gins(optoas(ORSH, nl->type), &n2, &n1);
+               if(s)
+                       gins(optoas(OMINUS, nl->type), N, &n1);
                gmove(&n1, res);
                regfree(&n1);
                return;
+       case 1:
+               // divide by 2
+               regalloc(&n1, nl->type, res);
+               cgen(nl, &n1);
+               if(!issigned[nl->type->etype])
+                       break;
+
+               // develop -1 iff nl is negative
+               regalloc(&n2, nl->type, N);
+               gmove(&n1, &n2);
+               nodconst(&n3, nl->type, w-1);
+               gins(optoas(ORSH, nl->type), &n3, &n2);
+               gins(optoas(OSUB, nl->type), &n2, &n1);
+               regfree(&n2);
+               break;
        default:
                regalloc(&n1, nl->type, res);
                cgen(nl, &n1);
-               if(issigned[nl->type->etype]) {
-                       // develop (2^k)-1 iff nl is negative
-                       regalloc(&n2, nl->type, N);
-                       gmove(&n1, &n2);
-                       nodconst(&n3, nl->type, w-1);
-                       gins(optoas(ORSH, nl->type), &n3, &n2);
-                       nodconst(&n3, nl->type, w-n);
-                       gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2);
-                       gins(optoas(OADD, nl->type), &n2, &n1);
-                       regfree(&n2);
-               }
-               nodconst(&n2, nl->type, n);
-               gins(optoas(ORSH, nl->type), &n2, &n1);
-               gmove(&n1, res);
-               regfree(&n1);
+               if(!issigned[nl->type->etype])
+                       break;
+
+               // develop (2^k)-1 iff nl is negative
+               regalloc(&n2, nl->type, N);
+               gmove(&n1, &n2);
+               nodconst(&n3, nl->type, w-1);
+               gins(optoas(ORSH, nl->type), &n3, &n2);
+               nodconst(&n3, nl->type, w-n);
+               gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2);
+               gins(optoas(OADD, nl->type), &n2, &n1);
+               regfree(&n2);
+               break;
        }
+       nodconst(&n2, nl->type, n);
+       gins(optoas(ORSH, nl->type), &n2, &n1);
+       if(s)
+               gins(optoas(OMINUS, nl->type), N, &n1);
+       gmove(&n1, res);
+       regfree(&n1);
        return;
 
 divbymul:
+goto longdiv;
        switch(simtype[nl->type->etype]) {
        default:
                goto longdiv;
 
-       case TINT32:
+       case TUINT16:
        case TUINT32:
-       case TINT64:
        case TUINT64:
+               m.w = w;
+               m.ud = mpgetfix(nr->val.u.xval);
+               umagic(&m);
+               if(m.bad)
+                       break;
+               if(op == OMOD) {
+                       // todo
+                       break;
+               }
+               if(m.ua != 0) {
+                       // todo fixup
+                       break;
+               }
                break;
-       }
-       // todo
-       goto longdiv;
 
-longdiv:
-       rax = reg[D_AX];
-       rdx = reg[D_DX];
+       case TINT16:
+       case TINT32:
+       case TINT64:
+               m.w = w;
+               m.sd = mpgetfix(nr->val.u.xval);
+               smagic(&m);
+               if(m.bad)
+                       break;
+               if(op == OMOD) {
+                       // todo
+                       break;
+               }
+               if(m.sm < 0) {
+                       // todo fixup
+                       break;
+               }
 
-       nodreg(&ax, types[TINT64], D_AX);
-       nodreg(&dx, types[TINT64], D_DX);
-       regalloc(&ax, nl->type, &ax);
-       regalloc(&dx, nl->type, &dx);
+               savex(D_AX, &ax, &oldax, res, nl->type);
+               savex(D_DX, &dx, &olddx, res, nl->type);
+               savex(D_CX, &cx, &oldcx, res, nl->type);
 
-       // save current ax and dx if they are live
-       // and not the destination
-       memset(&oldax, 0, sizeof oldax);
-       memset(&olddx, 0, sizeof olddx);
-       if(rax > 0 && !samereg(&ax, res)) {
-               regalloc(&oldax, nl->type, N);
-               gmove(&ax, &oldax);
-       }
-       if(rdx > 0 && !samereg(&dx, res)) {
-               regalloc(&olddx, nl->type, N);
-               gmove(&dx, &olddx);
-       }
+               regalloc(&n1, nl->type, N);
+               cgen(nl, &n1);                          // num -> reg(n1)
 
-       dodiv(op, nl, nr, res, &ax, &dx);
+               nodconst(&n2, nl->type, m.sm);
+               gmove(&n2, &ax);                        // const->ax
 
-       regfree(&ax);
-       regfree(&dx);
+               gins(optoas(OMUL, nl->type), &n1, N);   // imul reg
 
-       if(oldax.op != 0) {
-               gmove(&oldax, &ax);
-               regfree(&oldax);
-       }
-       if(olddx.op != 0) {
-               gmove(&olddx, &dx);
-               regfree(&olddx);
+               nodconst(&n2, nl->type, m.s);
+               gins(optoas(ORSH, nl->type), &n2, &dx); // shift dx
+
+               nodconst(&n2, nl->type, w-1);
+               gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg
+               gins(optoas(OSUB, nl->type), &n1, &dx); // added
+
+               if(m.sd < 0)
+                       gins(optoas(OMINUS, nl->type), N, &dx);
+
+               regfree(&n1);
+               gmove(&dx, res);
+
+               restx(&ax, &oldax);
+               restx(&dx, &olddx);
+               restx(&cx, &oldcx);
+               return;
        }
+       goto longdiv;
 
+longdiv:
+       savex(D_AX, &ax, &oldax, res, nl->type);
+       savex(D_DX, &dx, &olddx, res, nl->type);
+       dodiv(op, nl, nr, res, &ax, &dx);
+       restx(&ax, &oldax);
+       restx(&dx, &olddx);
 }
 
 /*
index 136a8d5efe3d2d5d3972a0b88a3a094b8900bfeb..c9d7980ebc0feabb4d18d0e0807e56bf228db0f6 100644 (file)
@@ -1881,6 +1881,17 @@ powtwo(Node *n)
                b = b<<1;
        }
 
+       if(!issigned[n->type->etype])
+               goto no;
+
+       v = -v;
+       b = 1ULL;
+       for(i=0; i<64; i++) {
+               if(b == v)
+                       return i+1000;
+               b = b<<1;
+       }
+
 no:
        return -1;
 }
@@ -1894,6 +1905,7 @@ tounsigned(Type *t)
        switch(t->etype) {
        default:
                print("tounsigned: unknown type %T\n", t);
+               t = T;
                break;
        case TINT:
                t = types[TUINT];
@@ -1913,3 +1925,186 @@ tounsigned(Type *t)
        }
        return t;
 }
+
+void
+smagic(Magic *m)
+{
+       int p;
+       uint64 ad, anc, delta, q1, r1, q2, r2, t, two31;
+       uint64 mask;
+
+       m->bad = 0;
+       switch(m->w) {
+       default:
+               m->bad = 1;
+               return;
+       case 8:
+               mask = 0xffLL;
+               break;
+       case 16:
+               mask = 0xffffLL;
+               break;
+       case 32:
+               mask = 0xffffffffLL;
+               break;
+       case 64:
+               mask = 0xffffffffffffffffLL;
+               break;
+       }
+       two31 = mask ^ (mask>>1);
+
+       p = m->w-1;
+       ad = m->sd;
+       if(m->sd < 0)
+               ad = -m->sd;
+
+       // bad denominators
+       if(ad == 0 || ad == 1 || ad == two31) {
+               m->bad = 1;
+               return;
+       }
+
+       t = two31;
+       ad &= mask;
+
+       anc = t - 1 - t%ad;
+       anc &= mask;
+
+       q1 = two31/anc;
+       r1 = two31 - q1*anc;
+       q1 &= mask;
+       r1 &= mask;
+
+       q2 = two31/ad;
+       r2 = two31 - q2*ad;
+       q2 &= mask;
+       r2 &= mask;
+
+       for(;;) {
+               p++;
+               q1 <<= 1;
+               r1 <<= 1;
+               q1 &= mask;
+               r1 &= mask;
+               if(r1 >= anc) {
+                       q1++;
+                       r1 -= anc;
+                       q1 &= mask;
+                       r1 &= mask;
+               }
+
+               q2 <<= 1;
+               r2 <<= 1;
+               q2 &= mask;
+               r2 &= mask;
+               if(r2 >= ad) {
+                       q2++;
+                       r2 -= ad;
+                       q2 &= mask;
+                       r2 &= mask;
+               }
+
+               delta = ad - r2;
+               delta &= mask;
+               if(q1 < delta || (q1 == delta && r1 == 0)) {
+                       continue;
+               }
+               break;
+       }
+
+       m->sm = q2+1;
+       m->s = p-m->w;
+}
+
+void
+umagic(Magic *m)
+{
+       int p;
+       uint64 nc, delta, q1, r1, q2, r2, two31;
+       uint64 mask;
+
+       m->bad = 0;
+       m->ua = 0;
+
+       switch(m->w) {
+       default:
+               m->bad = 1;
+               return;
+       case 8:
+               mask = 0xffLL;
+               break;
+       case 16:
+               mask = 0xffffLL;
+               break;
+       case 32:
+               mask = 0xffffffffLL;
+               break;
+       case 64:
+               mask = 0xffffffffffffffffLL;
+               break;
+       }
+       two31 = mask ^ (mask>>1);
+
+       m->ud &= mask;
+       if(m->ud == 0 || m->ud == two31) {
+               m->bad = 1;
+               return;
+       }
+       nc = mask - (-m->ud&mask)%m->ud;
+       p = m->w-1;
+
+       q1 = two31/nc;
+       r1 = two31 - q1*nc;
+       q1 &= mask;
+       r1 &= mask;
+
+       q2 = (two31-1) / m->ud;
+       r2 = (two31-1) - q2*m->ud;
+       q2 &= mask;
+       r2 &= mask;
+
+       for(;;) {
+               p++;
+               if(r1 >= nc-r1) {
+                       q1 <<= 1;
+                       q1++;
+                       r1 <<= 1;
+                       r1 -= nc;
+               } else {
+                       q1 <<= 1;
+                       r1 <<= 1;
+               }
+               q1 &= mask;
+               r1 &= mask;
+               if(r2+1 >= m->ud-r2) {
+                       if(q2 >= two31-1) {
+                               m->ua = 1;
+                       }
+                       q2 <<= 1;
+                       q2++;
+                       r2 <<= 1;
+                       r2++;
+                       r2 -= m->ud;
+               } else {
+                       if(q2 >= two31) {
+                               m->ua = 1;
+                       }
+                       q2 <<= 1;
+                       r2 <<= 1;
+                       r2++;
+               }
+               q2 &= mask;
+               r2 &= mask;
+
+               delta = m->ud - 1 - r2;
+               delta &= mask;
+
+               if(p < m->w+m->w)
+               if(q1 < delta || (q1 == delta && r1 == 0)) {
+                       continue;
+               }
+               break;
+       }
+       m->um = q2+1;
+       m->s = p-m->w;
+}