]> Cypherpunks repositories - gostls13.git/commitdiff
better 64-bit handling in 8g.
authorRuss Cox <rsc@golang.org>
Thu, 28 May 2009 22:48:47 +0000 (15:48 -0700)
committerRuss Cox <rsc@golang.org>
Thu, 28 May 2009 22:48:47 +0000 (15:48 -0700)
fewer moves, fewer stupid LEALs.
powser1 runs (with evaln commented out).
beginnings of floating point.

R=ken
OCL=29540
CL=29543

src/cmd/6g/gsubr.c
src/cmd/8c/list.c
src/cmd/8g/cgen.c
src/cmd/8g/gg.h
src/cmd/8g/ggen.c
src/cmd/8g/gsubr.c
src/cmd/gc/const.c

index d8bd0767f08044696f1539c2ba65028dff55827c..f9b092039c9d6c20fb33cf4827483c1888d87848 100644 (file)
@@ -505,7 +505,7 @@ gmove(Node *f, Node *t)
 
        switch(CASE(ft, tt)) {
        default:
-               fatal("gmove %T -> %T", f, t);
+               fatal("gmove %lT -> %lT", f->type, t->type);
 
        /*
         * integer copy and truncate
index ec5ac9d60f87857d83e7ecdf178fcfbe4dfaa3d1..c2ce5b2951243c093686b2b3de0295ca318b85a1 100644 (file)
@@ -57,7 +57,7 @@ Bconv(Fmt *fp)
                if(str[0])
                        strcat(str, " ");
                if(var[i].sym == S) {
-                       sprint(ss, "$%ld", var[i].offset);
+                       sprint(ss, "$%d", var[i].offset);
                        s = ss;
                } else
                        s = var[i].sym->name;
@@ -108,7 +108,7 @@ Dconv(Fmt *fp)
        i = a->type;
        if(i >= D_INDIR) {
                if(a->offset)
-                       sprint(str, "%ld(%R)", a->offset, i-D_INDIR);
+                       sprint(str, "%d(%R)", a->offset, i-D_INDIR);
                else
                        sprint(str, "(%R)", i-D_INDIR);
                goto brk;
@@ -117,7 +117,7 @@ Dconv(Fmt *fp)
 
        default:
                if(a->offset)
-                       sprint(str, "$%ld,%R", a->offset, i);
+                       sprint(str, "$%d,%R", a->offset, i);
                else
                        sprint(str, "%R", i);
                break;
@@ -127,35 +127,35 @@ Dconv(Fmt *fp)
                break;
 
        case D_BRANCH:
-               sprint(str, "%ld(PC)", a->offset-pc);
+               sprint(str, "%d(PC)", a->offset-pc);
                break;
 
        case D_EXTERN:
-               sprint(str, "%s+%ld(SB)", a->sym->name, a->offset);
+               sprint(str, "%s+%d(SB)", a->sym->name, a->offset);
                break;
 
        case D_STATIC:
-               sprint(str, "%s<>+%ld(SB)", a->sym->name,
+               sprint(str, "%s<>+%d(SB)", a->sym->name,
                        a->offset);
                break;
 
        case D_AUTO:
-               sprint(str, "%s+%ld(SP)", a->sym->name, a->offset);
+               sprint(str, "%s+%d(SP)", a->sym->name, a->offset);
                break;
 
        case D_PARAM:
                if(a->sym)
-                       sprint(str, "%s+%ld(FP)", a->sym->name, a->offset);
+                       sprint(str, "%s+%d(FP)", a->sym->name, a->offset);
                else
-                       sprint(str, "%ld(FP)", a->offset);
+                       sprint(str, "%d(FP)", a->offset);
                break;
 
        case D_CONST:
-               sprint(str, "$%ld", a->offset);
+               sprint(str, "$%d", a->offset);
                break;
 
        case D_CONST2:
-               sprint(str, "$%ld-%ld", a->offset, a->offset2);
+               sprint(str, "$%d-%d", a->offset, a->offset2);
                break;
 
        case D_FCONST:
@@ -185,7 +185,7 @@ conv:
 
 char*  regstr[] =
 {
-       "AL",   /*[D_AL]*/      
+       "AL",   /*[D_AL]*/
        "CL",
        "DL",
        "BL",
index 609d900b0489da83a3313e7c22523f8803febdeb..faa81d3300d5a0b3f238267325d4884dbfd819ee 100644 (file)
@@ -27,6 +27,42 @@ is64(Type *t)
        return 0;
 }
 
+int
+noconv(Type *t1, Type *t2)
+{
+       int e1, e2;
+
+       e1 = simtype[t1->etype];
+       e2 = simtype[t2->etype];
+
+       switch(e1) {
+       case TINT8:
+       case TUINT8:
+               return e2 == TINT8 || e2 == TUINT8;
+
+       case TINT16:
+       case TUINT16:
+               return e2 == TINT16 || e2 == TUINT16;
+
+       case TINT32:
+       case TUINT32:
+       case TPTR32:
+               return e2 == TINT32 || e2 == TUINT32 || e2 == TPTR32;
+
+       case TINT64:
+       case TUINT64:
+       case TPTR64:
+               return e2 == TINT64 || e2 == TUINT64 || e2 == TPTR64;
+
+       case TFLOAT32:
+               return e2 == TFLOAT32;
+
+       case TFLOAT64:
+               return e2 == TFLOAT64;
+       }
+       return 0;
+}
+
 /*
  * generate:
  *     res = n;
@@ -38,7 +74,7 @@ is64(Type *t)
 void
 cgen(Node *n, Node *res)
 {
-       Node *nl, *nr, *r, n1, n2, rr;
+       Node *nl, *nr, *r, n1, n2, rr, f0, f1;
        Prog *p1, *p2, *p3;
        int a;
 
@@ -65,13 +101,13 @@ cgen(Node *n, Node *res)
                sgen(n, res, n->type->width);
                return;
        }
-       
+
        // if both are addressable, move
        if(n->addable && res->addable) {
                gmove(n, res);
                return;
        }
-       
+
        // if both are not addressable, use a temporary.
        if(!n->addable && !res->addable) {
                tempalloc(&n1, n->type);
@@ -96,7 +132,7 @@ cgen(Node *n, Node *res)
        // 64-bit ops are hard on 32-bit machine.
        if(is64(n->type) && cancgen64(n, res))
                return;
-       
+
        // use ullman to pick operand to eval first.
        nl = n->left;
        nr = n->right;
@@ -112,12 +148,15 @@ cgen(Node *n, Node *res)
                return;
        }
 
+       if(isfloat[n->type->etype] && isfloat[nl->type->etype])
+               goto flt;
+
        switch(n->op) {
        default:
                dump("cgen", n);
                fatal("cgen %O", n->op);
                break;
-       
+
        // these call bgen to get a bool value
        case OOROR:
        case OANDAND:
@@ -162,7 +201,7 @@ cgen(Node *n, Node *res)
                goto abop;
 
        case OCONV:
-               if(eqtype(n->type, nl->type)) {
+               if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
                        cgen(nl, res);
                        break;
                }
@@ -236,7 +275,7 @@ cgen(Node *n, Node *res)
        case OADDR:
                agen(nl, res);
                break;
-       
+
        case OCALLMETH:
                cgen_callmeth(n, 0);
                cgen_callret(n, res);
@@ -303,6 +342,29 @@ uop:       // unary
        gmove(&n1, res);
        tempfree(&n1);
        return;
+
+flt:   // floating-point.  387 (not SSE2) to interoperate with 6c
+       nodreg(&f0, n->type, D_F0);
+       nodreg(&f1, n->type, D_F0+1);
+       if(nl->ullman >= nr->ullman) {
+               cgen(nl, &f0);
+               if(nr->addable)
+                       gins(foptoas(n->op, n->type, 0), nr, &f0);
+               else {
+                       cgen(nr, &f0);
+                       gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
+               }
+       } else {
+               cgen(nr, &f0);
+               if(nl->addable)
+                       gins(foptoas(n->op, n->type, Frev), nl, &f0);
+               else {
+                       cgen(nl, &f0);
+                       gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
+               }
+       }
+       gmove(&f0, res);
+       return;
 }
 
 /*
@@ -334,21 +396,21 @@ agen(Node *n, Node *res)
                regfree(&n1);
                return;
        }
-       
+
        // let's compute
        nl = n->left;
        nr = n->right;
-       
+
        switch(n->op) {
        default:
                fatal("agen %O", n->op);
-       
+
        case OCONV:
                if(!eqtype(n->type, nl->type))
                        fatal("agen: non-trivial OCONV");
                agen(nl, res);
                break;
-       
+
        case OCALLMETH:
                cgen_callmeth(n, 0);
                cgen_aret(n, res);
@@ -506,11 +568,11 @@ agen(Node *n, Node *res)
                        gins(optoas(OADD, types[tptr]), &n1, res);
                }
                break;
-       
+
        case OIND:
                cgen(nl, res);
                break;
-       
+
        case ODOT:
                t = nl->type;
                agen(nl, res);
@@ -719,7 +781,7 @@ bgen(Node *n, int true, Prog *to)
                        regfree(&n1);
                        break;
                }
-               
+
                if(is64(nr->type)) {
                        if(!nl->addable) {
                                tempalloc(&n1, nl->type);
@@ -916,7 +978,8 @@ sgen(Node *n, Node *res, int w)
 static int
 cancgen64(Node *n, Node *res)
 {
-       Node adr1, adr2, t1, t2, r1, r2, r3, r4, r5, nod, *l, *r;
+       Node t1, t2, ax, dx, cx, ex, fx, zero, *l, *r;
+       Node lo1, lo2, hi1, hi2;
        Prog *p1, *p2;
 
        if(n->op == OCALL)
@@ -936,14 +999,13 @@ cancgen64(Node *n, Node *res)
                return 1;
 
        case OMINUS:
+               nodconst(&zero, types[TINT32], 0);
                cgen(n->left, res);
-               gins(ANEGL, N, res);
-               res->xoffset += 4;
-               regalloc(&nod, types[TINT32], N);
-               gins(AXORL, &nod, &nod);
-               gins(ASBBL, res, &nod);
-               gins(AMOVL, &nod, res);
-               regfree(&nod);
+               split64(res, &lo1, &hi1);
+               gins(ANEGL, N, &lo1);
+               gins(AADCL, &zero, &hi1);
+               gins(ANEGL, N, &hi1);
+               splitclean();
                return 1;
 
        case OADD:
@@ -951,7 +1013,7 @@ cancgen64(Node *n, Node *res)
        case OMUL:
                break;
        }
-       
+
        l = n->left;
        r = n->right;
        if(!l->addable) {
@@ -963,97 +1025,73 @@ cancgen64(Node *n, Node *res)
                tempalloc(&t2, r->type);
                cgen(r, &t2);
                r = &t2;
-       }               
+       }
 
        // Setup for binary operation.
-       tempalloc(&adr1, types[TPTR32]);
-       agen(l, &adr1);         
-       tempalloc(&adr2, types[TPTR32]);
-       agen(r, &adr2);
+       split64(l, &lo1, &hi1);
+       split64(r, &lo2, &hi2);
 
-       nodreg(&r1, types[TPTR32], D_AX);
-       nodreg(&r2, types[TPTR32], D_DX);
-       nodreg(&r3, types[TPTR32], D_CX);
+       nodreg(&ax, types[TPTR32], D_AX);
+       nodreg(&cx, types[TPTR32], D_CX);
+       nodreg(&dx, types[TPTR32], D_DX);
 
+       // Do op.  Leave result in DX:AX.
        switch(n->op) {
        case OADD:
+               gins(AMOVL, &lo1, &ax);
+               gins(AMOVL, &hi1, &dx);
+               gins(AADDL, &lo2, &ax);
+               gins(AADCL, &hi2, &dx);
+               break;
+
        case OSUB:
-               gmove(&adr1, &r3);
-               r3.op = OINDREG;
-               r3.xoffset = 0;
-               gins(AMOVL, &r3, &r1);
-               r3.xoffset = 4;
-               gins(AMOVL, &r3, &r2);
-               
-               r3.xoffset = 0;
-               r3.op = OREGISTER;
-               gmove(&adr2, &r3);
-               r3.op = OINDREG;
-               if(n->op == OADD)
-                       gins(AADDL, &r3, &r1);
-               else
-                       gins(ASUBL, &r3, &r1);
-               r3.xoffset = 4;
-               if(n->op == OADD)
-                       gins(AADCL, &r3, &r2);
-               else
-                       gins(ASBBL, &r3, &r2);
+               gins(AMOVL, &lo1, &ax);
+               gins(AMOVL, &hi1, &dx);
+               gins(ASUBL, &lo2, &ax);
+               gins(ASBBL, &hi2, &dx);
                break;
 
-       case OMUL:      
-               regalloc(&r4, types[TPTR32], N);
-               regalloc(&r5, types[TPTR32], N);
-               
-               // load args into r2:r1 and r4:r3.
-               // leave result in r2:r1 (DX:AX)
-               gmove(&adr1, &r5);
-               r5.op = OINDREG;
-               r5.xoffset = 0;
-               gmove(&r5, &r1);
-               r5.xoffset = 4;
-               gmove(&r5, &r2);
-               r5.xoffset = 0;
-               r5.op = OREGISTER;
-               gmove(&adr2, &r5);
-               r5.op = OINDREG;
-               gmove(&r5, &r3);
-               r5.xoffset = 4;
-               gmove(&r5, &r4);
-               r5.xoffset = 0;
-               r5.op = OREGISTER;
-
-               // if r2|r4 == 0, use one 32 x 32 -> 64 unsigned multiply
-               gmove(&r2, &r5);
-               gins(AORL, &r4, &r5);
+       case OMUL:
+               // let's call the next two EX and FX.
+               regalloc(&ex, types[TPTR32], N);
+               regalloc(&fx, types[TPTR32], N);
+
+               // load args into DX:AX and EX:CX.
+               gins(AMOVL, &lo1, &ax);
+               gins(AMOVL, &hi1, &dx);
+               gins(AMOVL, &lo2, &cx);
+               gins(AMOVL, &hi2, &ex);
+
+               // if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply.
+               gins(AMOVL, &dx, &fx);
+               gins(AORL, &ex, &fx);
                p1 = gbranch(AJNE, T);
-               gins(AMULL, &r3, N);    // AX (=r1) is implied
+               gins(AMULL, &cx, N);    // implicit &ax
                p2 = gbranch(AJMP, T);
                patch(p1, pc);
-       
-               // full 64x64 -> 64, from 32 x 32 -> 64.
-               gins(AIMULL, &r3, &r2);
-               gins(AMOVL, &r1, &r5);
-               gins(AIMULL, &r4, &r5);
-               gins(AADDL, &r2, &r5);
-               gins(AMOVL, &r3, &r2);
-               gins(AMULL, &r2, N);    // AX (=r1) is implied
-               gins(AADDL, &r5, &r2);
+
+               // full 64x64 -> 64, from 32x32 -> 64.
+               gins(AIMULL, &cx, &dx);
+               gins(AMOVL, &ax, &fx);
+               gins(AIMULL, &ex, &fx);
+               gins(AADDL, &dx, &fx);
+               gins(AMOVL, &cx, &dx);
+               gins(AMULL, &dx, N);    // implicit &ax
+               gins(AADDL, &fx, &dx);
                patch(p2, pc);
-               regfree(&r4);
-               regfree(&r5);
+
+               regfree(&ex);
+               regfree(&fx);
                break;
-       
        }
-       
-       tempfree(&adr2);
-       tempfree(&adr1);
-
-       // Store result.
-       gins(AMOVL, &r1, res);
-       res->xoffset += 4;
-       gins(AMOVL, &r2, res);
-       res->xoffset -= 4;
-       
+       splitclean();
+       splitclean();
+
+       split64(res, &lo1, &hi1);
+       gins(AMOVL, &ax, &lo1);
+       gins(AMOVL, &dx, &hi1);
+       splitclean();
+
        if(r == &t2)
                tempfree(&t2);
        if(l == &t1)
@@ -1068,47 +1106,23 @@ cancgen64(Node *n, Node *res)
 void
 cmp64(Node *nl, Node *nr, int op, Prog *to)
 {
-       int64 x;
-       Node adr1, adr2, rr;
-       Prog *br, *p;
+       Node lo1, hi1, lo2, hi2, rr;
+       Prog *br;
        Type *t;
-       
-       t = nr->type;
-       
-       memset(&adr1, 0, sizeof adr1);
-       memset(&adr2, 0, sizeof adr2);
-
-       regalloc(&adr1, types[TPTR32], N);
-       agen(nl, &adr1);
-       adr1.op = OINDREG;
-       nl = &adr1;
-       
-       x = 0;
-       if(nr->op == OLITERAL) {
-               if(!isconst(nr, CTINT))
-                       fatal("bad const in cmp64");
-               x = mpgetfix(nr->val.u.xval);
-       } else {
-               regalloc(&adr2, types[TPTR32], N);
-               agen(nr, &adr2);
-               adr2.op = OINDREG;
-               nr = &adr2;
-       }
-       
+
+       split64(nl, &lo1, &hi1);
+       split64(nr, &lo2, &hi2);
+
        // compare most significant word
-       nl->xoffset += 4;
-       if(nr->op == OLITERAL) {
-               p = gins(ACMPL, nl, nodintconst((uint32)(x>>32)));
-       } else {
-               regalloc(&rr, types[TUINT32], N);
-               nr->xoffset += 4;
-               gins(AMOVL, nr, &rr);
-               gins(ACMPL, nl, &rr);
-               nr->xoffset -= 4;
+       t = hi1.type;
+       if(nl->op == OLITERAL || nr->op == OLITERAL)
+               gins(ACMPL, &hi1, &hi2);
+       else {
+               regalloc(&rr, types[TINT32], N);
+               gins(AMOVL, &hi1, &rr);
+               gins(ACMPL, &rr, &hi2);
                regfree(&rr);
        }
-       nl->xoffset -= 4;
-
        br = P;
        switch(op) {
        default:
@@ -1149,39 +1163,28 @@ cmp64(Node *nl, Node *nr, int op, Prog *to)
                // L:
                patch(gbranch(optoas(OLT, t), T), to);
                br = gbranch(optoas(OGT, t), T);
-               break;  
+               break;
        }
 
        // compare least significant word
-       if(nr->op == OLITERAL) {
-               p = gins(ACMPL, nl, nodintconst((uint32)x));
-       } else {
-               regalloc(&rr, types[TUINT32], N);
-               gins(AMOVL, nr, &rr);
-               gins(ACMPL, nl, &rr);
+       t = lo1.type;
+       if(nl->op == OLITERAL || nr->op == OLITERAL)
+               gins(ACMPL, &lo1, &lo2);
+       else {
+               regalloc(&rr, types[TINT32], N);
+               gins(AMOVL, &lo1, &rr);
+               gins(ACMPL, &rr, &lo2);
                regfree(&rr);
        }
 
        // jump again
-       switch(op) {
-       default:
-               fatal("cmp64 %O %T", op, nr->type);
-       case OEQ:
-       case ONE:
-       case OGE:
-       case OGT:
-       case OLE:
-       case OLT:
-               patch(gbranch(optoas(op, t), T), to);
-               break;  
-       }
+       patch(gbranch(optoas(op, t), T), to);
 
        // point first branch down here if appropriate
        if(br != P)
                patch(br, pc);
 
-       regfree(&adr1);
-       if(nr == &adr2)
-               regfree(&adr2); 
+       splitclean();
+       splitclean();
 }
 
index d7a9851f408778ce8ed4211153f21bd33be5b90a..9943c2b607415dbca17168180481181ee7059f12 100644 (file)
@@ -43,6 +43,14 @@ struct       Prog
        void*   reg;            // pointer to containing Reg struct
 };
 
+// foptoas flags
+enum
+{
+       Frev = 1<<0,
+       Fpop = 1<<1,
+       Fpop2 = 1<<2,
+};
+
 EXTERN Biobuf* bout;
 EXTERN int32   dynloc;
 EXTERN uchar   reg[D_NONE];
@@ -114,6 +122,7 @@ Prog*       gop(int, Node*, Node*, Node*);
 void   setconst(Addr*, vlong);
 void   setaddr(Addr*, Node*);
 int    optoas(int, Type*);
+int    foptoas(int, Type*, int);
 void   ginit(void);
 void   gclean(void);
 void   regalloc(Node*, Type*, Node*);
@@ -131,7 +140,10 @@ Plist*     newplist(void);
 int    isfat(Type*);
 void   sudoclean(void);
 int    sudoaddable(int, Node*, Addr*);
+int    dotaddable(Node*, Node*);
 void   afunclit(Addr*);
+void   split64(Node*, Node*, Node*);
+void   splitclean(void);
 
 /*
  * list.c
index 47f21bcb6d61fb7115acbe71aeda6a52c0fd41cb..249c9fe8c773c1e5b201b14896cd52e202b8f040 100644 (file)
@@ -468,7 +468,7 @@ cgen_asop(Node *n)
 
 hard:
        if(nr->ullman > nl->ullman) {
-               regalloc(&n2, nr->type, N);
+               tempalloc(&n2, nr->type);
                cgen(nr, &n2);
                igen(nl, &n1, N);
        } else {
index 99c2b8af12860e284b931bf31bba0ba76cc6f594..6e82890d339764745816248bf305ee516bbaafb8 100755 (executable)
@@ -571,6 +571,81 @@ optoas(int op, Type *t)
        return a;
 }
 
+#define FCASE(a, b, c)  (((a)<<16)|((b)<<8)|(c))
+int
+foptoas(int op, Type *t, int flg)
+{
+       int et;
+
+       et = t->etype;
+
+       // clear Frev if unneeded
+       switch(op) {
+       case OADD:
+       case OMUL:
+               flg &= ~Frev;
+               break;
+       }
+
+       switch(FCASE(op, et, flg)) {
+       case FCASE(OADD, TFLOAT32, 0):
+               return AFADDF;
+       case FCASE(OADD, TFLOAT64, 0):
+               return AFADDD;
+       case FCASE(OADD, TFLOAT64, Fpop):
+               return AFADDDP;
+
+       case FCASE(OSUB, TFLOAT32, 0):
+               return AFSUBF;
+       case FCASE(OSUB, TFLOAT32, Frev):
+               return AFSUBRF;
+
+       case FCASE(OSUB, TFLOAT64, 0):
+               return AFSUBD;
+       case FCASE(OSUB, TFLOAT64, Frev):
+               return AFSUBRD;
+       case FCASE(OSUB, TFLOAT64, Fpop):
+               return AFSUBDP;
+       case FCASE(OSUB, TFLOAT64, Fpop|Frev):
+               return AFSUBRDP;
+
+       case FCASE(OMUL, TFLOAT32, 0):
+               return AFMULF;
+       case FCASE(OMUL, TFLOAT64, 0):
+               return AFMULD;
+       case FCASE(OMUL, TFLOAT64, Fpop):
+               return AFMULDP;
+
+       case FCASE(ODIV, TFLOAT32, 0):
+               return AFDIVF;
+       case FCASE(ODIV, TFLOAT32, Frev):
+               return AFDIVRF;
+
+       case FCASE(ODIV, TFLOAT64, 0):
+               return AFDIVD;
+       case FCASE(ODIV, TFLOAT64, Frev):
+               return AFDIVRD;
+       case FCASE(ODIV, TFLOAT64, Fpop):
+               return AFDIVDP;
+       case FCASE(ODIV, TFLOAT64, Fpop|Frev):
+               return AFDIVRDP;
+
+       case FCASE(OCMP, TFLOAT32, 0):
+               return AFCOMF;
+       case FCASE(OCMP, TFLOAT32, Fpop):
+               return AFCOMFP;
+       case FCASE(OCMP, TFLOAT64, 0):
+               return AFCOMD;
+       case FCASE(OCMP, TFLOAT64, Fpop):
+               return AFCOMDP;
+       case FCASE(OCMP, TFLOAT64, Fpop2):
+               return AFCOMDPP;
+       }
+
+       fatal("foptoas %O %T %#x", op, t, flg);
+       return 0;
+}
+
 static int     resvd[] =
 {
 //     D_DI,   // for movstring
@@ -600,6 +675,8 @@ ginit(void)
                reg[resvd[i]]++;
 }
 
+ulong regpc[D_NONE];
+
 void
 gclean(void)
 {
@@ -610,14 +687,12 @@ gclean(void)
 
        for(i=D_AX; i<=D_DI; i++)
                if(reg[i])
-                       yyerror("reg %R left allocated\n", i);
+                       yyerror("reg %R left allocated at %lux\n", i, regpc[i]);
        for(i=D_F0; i<=D_F7; i++)
                if(reg[i])
                        yyerror("reg %R left allocated\n", i);
 }
 
-ulong regpc[D_NONE];
-
 /*
  * allocate register of type t, leave in n.
  * if o != N, o is desired fixed register.
@@ -681,7 +756,7 @@ err:
 
 out:
        if(reg[i] == 0) {
-               regpc[i] = getcallerpc(&n);
+               regpc[i] = (ulong)__builtin_return_address(0);
                if(i == D_AX || i == D_CX || i == D_DX || i == D_SP) {
                        dump("regalloc-o", o);
                        fatal("regalloc %R", i);
@@ -837,168 +912,445 @@ gconreg(int as, vlong c, int reg)
        gins(as, &n1, &n2);
 }
 
+
 /*
- * generate move:
- *     t = f
- * f may be in memory,
- * t is known to be a 32-bit register.
+ * Is this node a memory operand?
+ */
+int
+ismem(Node *n)
+{
+       switch(n->op) {
+       case OINDREG:
+       case ONAME:
+       case OPARAM:
+               return 1;
+       }
+       return 0;
+}
+
+Node sclean[10];
+int nsclean;
+
+/*
+ * n is a 64-bit value.  fill in lo and hi to refer to its 32-bit halves.
  */
 void
-gload(Node *f, Node *t)
+split64(Node *n, Node *lo, Node *hi)
 {
-       int a, ft;
+       Node n1;
+       int64 i;
 
-       ft = simtype[f->type->etype];
+       if(!is64(n->type))
+               fatal("split64 %T", n->type);
 
-       switch(ft) {
+       sclean[nsclean].op = OEMPTY;
+       if(nsclean >= nelem(sclean))
+               fatal("split64 clean");
+       nsclean++;
+       switch(n->op) {
        default:
-               fatal("gload %T", f->type);
-       case TINT8:
-               a = AMOVBLSX;
-               if(isconst(f, CTINT) || isconst(f, CTBOOL))
-                       a = AMOVL;
-               break;
-       case TBOOL:
-       case TUINT8:
-               a = AMOVBLZX;
-               if(isconst(f, CTINT) || isconst(f, CTBOOL))
-                       a = AMOVL;
-               break;
-       case TINT16:
-               a = AMOVWLSX;
-               if(isconst(f, CTINT) || isconst(f, CTBOOL))
-                       a = AMOVL;
-               break;
-       case TUINT16:
-               a = AMOVWLZX;
-               if(isconst(f, CTINT))
-                       a = AMOVL;
-               break;
-       case TINT32:
-       case TUINT32:
-       case TPTR32:
-               a = AMOVL;
+               if(!dotaddable(n, &n1)) {
+                       igen(n, &n1, N);
+                       sclean[nsclean-1] = n1;
+               }
+               n = &n1;
+               // fall through
+       case ONAME:
+       case OINDREG:
+               *lo = *n;
+               *hi = *n;
+               lo->type = types[TUINT32];
+               if(n->type->etype == TINT64)
+                       hi->type = types[TINT32];
+               else
+                       hi->type = types[TUINT32];
+               hi->xoffset += 4;
                break;
-       case TINT64:
-       case TUINT64:
-               a = AMOVL;      // truncating
+
+       case OLITERAL:
+               convconst(&n1, n->type, &n->val);
+               i = mpgetfix(n1.val.u.xval);
+               nodconst(lo, types[TUINT32], (uint32)i);
+               i >>= 32;
+               if(n->type->etype == TINT64)
+                       nodconst(hi, types[TINT32], (int32)i);
+               else
+                       nodconst(hi, types[TUINT32], (uint32)i);
                break;
        }
+}
 
-       gins(a, f, t);
+void
+splitclean(void)
+{
+       if(nsclean <= 0)
+               fatal("splitclean");
+       nsclean--;
+       if(sclean[nsclean].op != OEMPTY)
+               regfree(&sclean[nsclean]);
 }
 
-/*
- * generate move:
- *     t = f
- * f is known to be a 32-bit register.
- * t may be in memory.
- */
 void
-gstore(Node *f, Node *t)
+gmove(Node *f, Node *t)
 {
        int a, ft, tt;
-       Node nod, adr;
+       Type *cvt;
+       Node r1, r2, flo, fhi, tlo, thi, con;
+
+       if(debug['M'])
+               print("gmove %N -> %N\n", f, t);
+
+       ft = simsimtype(f->type);
+       tt = simsimtype(t->type);
+       cvt = t->type;
+
+       // cannot have two memory operands;
+       // except 64-bit, which always copies via registers anyway.
+       if(ismem(f) && ismem(t) && !is64(f->type) && !is64(t->type))
+               goto hard;
+
+       // convert constant to desired type
+       if(f->op == OLITERAL) {
+               convconst(&con, t->type, &f->val);
+               f = &con;
+               ft = tt;        // so big switch will choose a simple mov
+
+               // some constants can't move directly to memory.
+               if(ismem(t)) {
+                       // float constants come from memory.
+                       if(isfloat[tt])
+                               goto hard;
+               }
+       }
 
-       ft = simtype[f->type->etype];
-       tt = simtype[t->type->etype];
+       // value -> value copy, only one memory operand.
+       // figure out the instruction to use.
+       // break out of switch for one-instruction gins.
+       // goto rdst for "destination must be register".
+       // goto hard for "convert to cvt type first".
+       // otherwise handle and return.
 
-       switch(tt) {
+       switch(CASE(ft, tt)) {
        default:
-               fatal("gstore %T", t->type);
-       case TINT8:
-       case TBOOL:
-       case TUINT8:
+               fatal("gmove %N -> %N", f, t);
+
+       /*
+        * integer copy and truncate
+        */
+       case CASE(TINT8, TINT8):        // same size
+       case CASE(TINT8, TUINT8):
+       case CASE(TUINT8, TINT8):
+       case CASE(TUINT8, TUINT8):
+       case CASE(TINT16, TINT8):       // truncate
+       case CASE(TUINT16, TINT8):
+       case CASE(TINT32, TINT8):
+       case CASE(TUINT32, TINT8):
+//     case CASE(TINT64, TINT8):
+//     case CASE(TUINT64, TINT8):
+       case CASE(TINT16, TUINT8):
+       case CASE(TUINT16, TUINT8):
+       case CASE(TINT32, TUINT8):
+       case CASE(TUINT32, TUINT8):
+//     case CASE(TINT64, TUINT8):
+//     case CASE(TUINT64, TUINT8):
                a = AMOVB;
                break;
-       case TINT16:
-       case TUINT16:
+
+       case CASE(TINT16, TINT16):      // same size
+       case CASE(TINT16, TUINT16):
+       case CASE(TUINT16, TINT16):
+       case CASE(TUINT16, TUINT16):
+       case CASE(TINT32, TINT16):      // truncate
+       case CASE(TUINT32, TINT16):
+//     case CASE(TINT64, TINT16):
+//     case CASE(TUINT64, TINT16):
+       case CASE(TINT32, TUINT16):
+       case CASE(TUINT32, TUINT16):
+//     case CASE(TINT64, TUINT16):
+//     case CASE(TUINT64, TUINT16):
                a = AMOVW;
                break;
-       case TINT32:
-       case TUINT32:
-       case TPTR32:
+
+       case CASE(TINT32, TINT32):      // same size
+       case CASE(TINT32, TUINT32):
+       case CASE(TUINT32, TINT32):
+       case CASE(TUINT32, TUINT32):
+//     case CASE(TINT64, TINT32):      // truncate
+//     case CASE(TUINT64, TINT32):
+//     case CASE(TINT64, TUINT32):
+//     case CASE(TUINT64, TUINT32):
                a = AMOVL;
                break;
-       case TINT64:
-       case TUINT64:
-               if(t->op == OREGISTER)
-                       fatal("gstore %T %O", t->type, t->op);
-               memset(&adr, 0, sizeof adr);
-               igen(t, &adr, N);
-               t = &adr;
-               t->xoffset += 4;
-               switch(ft) {
-               default:
-                       fatal("gstore %T -> %T", f, t);
-                       break;
-               case TINT32:
-                       nodreg(&nod, types[TINT32], D_AX);
-                       gins(AMOVL, f, &nod);
-                       gins(ACDQ, N, N);
-                       nodreg(&nod, types[TINT32], D_DX);
-                       gins(AMOVL, &nod, t);
-                       break;
-               case TUINT32:
-                       gins(AMOVL, nodintconst(0), t);
-                       break;
+
+       case CASE(TINT64, TINT64):      // same size
+       case CASE(TINT64, TUINT64):
+       case CASE(TUINT64, TINT64):
+       case CASE(TUINT64, TUINT64):
+               split64(f, &flo, &fhi);
+               split64(t, &tlo, &thi);
+               if(f->op == OLITERAL) {
+                       gins(AMOVL, &flo, &tlo);
+                       gins(AMOVL, &fhi, &thi);
+               } else {
+                       regalloc(&r1, types[TUINT32], N);
+                       regalloc(&r2, types[TUINT32], N);
+                       gins(AMOVL, &flo, &r1);
+                       gins(AMOVL, &fhi, &r2);
+                       gins(AMOVL, &r1, &tlo);
+                       gins(AMOVL, &r2, &thi);
+                       regfree(&r2);
+                       regfree(&r1);
                }
-               t->xoffset -= 4;
-               a = AMOVL;
-       }
+               splitclean();
+               splitclean();
+               return;
 
-       gins(a, f, t);
-       if(t == &adr)
-               regfree(&adr);
-}
+       /*
+        * integer up-conversions
+        */
+       case CASE(TINT8, TINT16):       // sign extend int8
+       case CASE(TINT8, TUINT16):
+               a = AMOVBWSX;
+               goto rdst;
+       case CASE(TINT8, TINT32):
+       case CASE(TINT8, TUINT32):
+               a = AMOVBLSX;
+               goto rdst;
+//     case CASE(TINT8, TINT64):
+//     case CASE(TINT8, TUINT64):
+//             a = AMOVBQSX;
+//             goto rdst;
+
+       case CASE(TUINT8, TINT16):      // zero extend uint8
+       case CASE(TUINT8, TUINT16):
+               a = AMOVBWZX;
+               goto rdst;
+       case CASE(TUINT8, TINT32):
+       case CASE(TUINT8, TUINT32):
+               a = AMOVBLZX;
+               goto rdst;
+//     case CASE(TUINT8, TINT64):
+//     case CASE(TUINT8, TUINT64):
+//             a = AMOVBQZX;
+//             goto rdst;
+
+       case CASE(TINT16, TINT32):      // sign extend int16
+       case CASE(TINT16, TUINT32):
+               a = AMOVWLSX;
+               goto rdst;
+//     case CASE(TINT16, TINT64):
+//     case CASE(TINT16, TUINT64):
+//             a = AMOVWQSX;
+//             goto rdst;
+
+       case CASE(TUINT16, TINT32):     // zero extend uint16
+       case CASE(TUINT16, TUINT32):
+               a = AMOVWLZX;
+               goto rdst;
+//     case CASE(TUINT16, TINT64):
+//     case CASE(TUINT16, TUINT64):
+//             a = AMOVWQZX;
+//             goto rdst;
+
+       case CASE(TINT32, TINT64):      // sign extend int32
+       case CASE(TINT32, TUINT64):
+               split64(t, &tlo, &thi);
+               nodreg(&flo, tlo.type, D_AX);
+               nodreg(&fhi, thi.type, D_DX);
+               gmove(f, &flo);
+               gins(ACDQ, N, N);
+               gins(AMOVL, &flo, &tlo);
+               gins(AMOVL, &fhi, &thi);
+               return;
 
-void
-gmove(Node *f, Node *t)
-{
-       int ft, tt, t64, a;
-       Node nod;
+//     case CASE(TUINT32, TINT64):     // zero extend uint32
+//     case CASE(TUINT32, TUINT64):
+//             // AMOVL into a register zeros the top of the register,
+//             // so this is not always necessary, but if we rely on AMOVL
+//             // the optimizer is almost certain to screw with us.
+//             a = AMOVLQZX;
+//             goto rdst;
 
-       ft = simtype[f->type->etype];
-       tt = simtype[t->type->etype];
+       /*
+       * float to integer
+       *
+       case CASE(TFLOAT32, TINT16):
+       case CASE(TFLOAT32, TINT32):
+       case CASE(TFLOAT32, TINT64):
+       case CASE(TFLOAT64, TINT16):
+       case CASE(TFLOAT64, TINT32):
+       case CASE(TFLOAT64, TINT64):
+               if(ft == TFLOAT32)
+                       gins(AFMOVF, f, &f0);
+               else
+                       gins(AFMOVD, f, &f0);
+               if(tt == TINT16)
+                       gins(AFMOVWP, &f0, t);
+               else if(tt == TINT32)
+                       gins(AFMOVLP, &f0, t);
+               else
+                       gins(AFMOVVP, &f0, t);
+               return;
 
-       a = AGOK;
+       case CASE(TFLOAT32, TINT8):
+       case CASE(TFLOAT32, TUINT16):
+       case CASE(TFLOAT32, TUINT8):
+       case CASE(TFLOAT64, TINT8):
+       case CASE(TFLOAT64, TUINT16):
+       case CASE(TFLOAT64, TUINT8):
+               // convert via int32.
+               cvt = types[TINT32];
+               goto hard;
+
+       case CASE(TFLOAT32, TUINT32):
+       case CASE(TFLOAT64, TUINT32):
+               // could potentially convert via int64.
+               cvt = types[TINT64];
+               goto hard;
+
+       case CASE(TFLOAT32, TUINT64):
+       case CASE(TFLOAT64, TUINT64):
+               if(ft == TFLOAT32)
+                       gins(AFMOVF, f, &f0);
+               else
+                       gins(AFMOVD, f, &f0);
+               // algorithm is:
+               //      if small enough, use native float64 -> int64 conversion.
+               //      otherwise, subtract 2^63, convert, and add it back.
+               bignodes();
+               regalloc(&r1, types[ft], N);
+               regalloc(&r2, types[ft], N);
+               gins(optoas(OCMP, f->type), &bigf, &r1);
+               p1 = gbranch(optoas(OLE, f->type), T);
+               gins(a, &r1, &r2);
+               p2 = gbranch(AJMP, T);
+               patch(p1, pc);
+               gins(optoas(OAS, f->type), &bigf, &r3);
+               gins(optoas(OSUB, f->type), &r3, &r1);
+               gins(a, &r1, &r2);
+               gins(AMOVQ, &bigi, &r4);
+               gins(AXORQ, &r4, &r2);
+               patch(p2, pc);
+               gmove(&r2, t);
+               regfree(&r4);
+               regfree(&r3);
+               regfree(&r2);
+               regfree(&r1);
+               fatal("lazy");
+               return;
+       */
+       /*
+        * integer to float
+        *
+       case CASE(TINT32, TFLOAT32):
+               a = ACVTSL2SS;
+               goto rdst;
+
+
+       case CASE(TINT32, TFLOAT64):
+               a = ACVTSL2SD;
+               goto rdst;
+
+       case CASE(TINT64, TFLOAT32):
+               a = ACVTSQ2SS;
+               goto rdst;
+
+       case CASE(TINT64, TFLOAT64):
+               a = ACVTSQ2SD;
+               goto rdst;
+
+       case CASE(TINT16, TFLOAT32):
+       case CASE(TINT16, TFLOAT64):
+       case CASE(TINT8, TFLOAT32):
+       case CASE(TINT8, TFLOAT64):
+       case CASE(TUINT16, TFLOAT32):
+       case CASE(TUINT16, TFLOAT64):
+       case CASE(TUINT8, TFLOAT32):
+       case CASE(TUINT8, TFLOAT64):
+               // convert via int32
+               cvt = types[TINT32];
+               goto hard;
+
+       case CASE(TUINT32, TFLOAT32):
+       case CASE(TUINT32, TFLOAT64):
+               // convert via int64.
+               cvt = types[TINT64];
+               goto hard;
+
+       case CASE(TUINT64, TFLOAT32):
+       case CASE(TUINT64, TFLOAT64):
+               // algorithm is:
+               //      if small enough, use native int64 -> uint64 conversion.
+               //      otherwise, halve (rounding to odd?), convert, and double.
+               a = ACVTSQ2SS;
+               if(tt == TFLOAT64)
+                       a = ACVTSQ2SD;
+               nodconst(&zero, types[TUINT64], 0);
+               nodconst(&one, types[TUINT64], 1);
+               regalloc(&r1, f->type, f);
+               regalloc(&r2, t->type, t);
+               regalloc(&r3, f->type, N);
+               regalloc(&r4, f->type, N);
+               gmove(f, &r1);
+               gins(ACMPQ, &r1, &zero);
+               p1 = gbranch(AJLT, T);
+               gins(a, &r1, &r2);
+               p2 = gbranch(AJMP, T);
+               patch(p1, pc);
+               gmove(&r1, &r3);
+               gins(ASHRQ, &one, &r3);
+               gmove(&r1, &r4);
+               gins(AANDL, &one, &r4);
+               gins(AORQ, &r4, &r3);
+               gins(a, &r3, &r2);
+               gins(optoas(OADD, t->type), &r2, &r2);
+               patch(p2, pc);
+               gmove(&r2, t);
+               regfree(&r4);
+               regfree(&r3);
+               regfree(&r2);
+               regfree(&r1);
+               return;
+       */
+       /*
+        * float to float
+        *
+       case CASE(TFLOAT32, TFLOAT32):
+               a = AMOVSS;
+               break;
 
-       t64 = 0;
-       if(tt == TINT64 || tt == TUINT64 || tt == TPTR64)
-               t64 = 1;
+       case CASE(TFLOAT64, TFLOAT64):
+               a = AMOVSD;
+               break;
 
-       if(debug['M'])
-               print("gop: %O %O[%E],%O[%E]\n", OAS,
-                       f->op, ft, t->op, tt);
-       if(isfloat[ft] && f->op == OCONST) {
-               fatal("fp");
-               /* TO DO: pick up special constants, possibly preloaded */
-       //F
-       /*
-               if(mpgetflt(f->val.u.fval) == 0.0) {
-                       regalloc(&nod, t->type, t);
-                       gins(AXORPD, &nod, &nod);
-                       gmove(&nod, t);
-                       regfree(&nod);
-                       return;
-               }
+       case CASE(TFLOAT32, TFLOAT64):
+               a = ACVTSS2SD;
+               goto rdst;
+
+       case CASE(TFLOAT64, TFLOAT32):
+               a = ACVTSD2SS;
+               goto rdst;
        */
        }
 
-       if(is64(types[ft]) && isconst(f, CTINT)) {
-               f->type = types[TINT32];        // XXX check constant value, choose correct type
-               ft = TINT32;
-       }
+       gins(a, f, t);
+       return;
 
-       if(is64(types[ft]) && is64(types[tt])) {
-               sgen(f, t, 8);
-               return;
-       }
+rdst:
+       // requires register destination
+       regalloc(&r1, t->type, t);
+       gins(a, f, &r1);
+       gmove(&r1, t);
+       regfree(&r1);
+       return;
 
-       regalloc(&nod, types[TINT32], t);
-       gload(f, &nod);
-       gstore(&nod, t);
-       regfree(&nod);
+hard:
+       // requires register intermediate
+       regalloc(&r1, cvt, t);
+       gmove(f, &r1);
+       gmove(&r1, t);
+       regfree(&r1);
+       return;
 }
 
 int
@@ -1025,9 +1377,13 @@ gins(int as, Node *f, Node *t)
 {
        Prog *p;
 
-       // generating AMOVL BX, BX is just dumb.
-       if(f != N && t != N && samaddr(f, t) && as == AMOVL)
-               return nil;
+       switch(as) {
+       case AMOVB:
+       case AMOVW:
+       case AMOVL:
+               if(f != N && t != N && samaddr(f, t))
+                       return nil;
+       }
 
        p = prog(as);
        if(f != N)
@@ -1173,6 +1529,25 @@ naddr(Node *n, Addr *a)
        }
 }
 
+int
+dotaddable(Node *n, Node *n1)
+{
+       int o, oary[10];
+       Node *nn;
+
+       if(n->op != ODOT)
+               return 0;
+
+       o = dotoffset(n, oary, &nn);
+       if(nn != N && nn->addable && o == 1 && oary[0] >= 0) {
+               *n1 = *nn;
+               n1->type = n->type;
+               n1->xoffset += oary[0];
+               return 1;
+       }
+       return 0;
+}
+
 void
 sudoclean(void)
 {
index 72cf684adfd847fa9c0dd90dec27b9e509ac30b1..d672ec9f09646707f4357b9a0f8e89c1bfefb0bd 100644 (file)
@@ -857,7 +857,7 @@ convconst(Node *con, Type *t, Val *val)
                con->val.u.xval = mal(sizeof *con->val.u.xval);
                switch(val->ctype) {
                default:
-                       fatal("convconst ctype=%d %lT", val->ctype, t->type);
+                       fatal("convconst ctype=%d %lT", val->ctype, t);
                case CTINT:
                        i = mpgetfix(val->u.xval);
                        break;