8g:

author Russ Cox <rsc@golang.org>

Wed, 3 Jun 2009 06:25:17 +0000 (23:25 -0700)

committer Russ Cox <rsc@golang.org>

Wed, 3 Jun 2009 06:25:17 +0000 (23:25 -0700)
author Russ Cox <rsc@golang.org>
Wed, 3 Jun 2009 06:25:17 +0000 (23:25 -0700)
committer Russ Cox <rsc@golang.org>
Wed, 3 Jun 2009 06:25:17 +0000 (23:25 -0700)
diff --git a/src/cmd/8g/cgen.c b/src/cmd/8g/cgen.c

index 911e004dc2abecd052ef2fc52838a018d7cb3c91..75c15cd23a4d0105e144d4cdd0f7757a4f916b71 100644 (file)
--- a/src/cmd/8g/cgen.c
+++ b/src/cmd/8g/cgen.c
@@ -7,58 +7,6 @@
  
  #include "gg.h"
  
-static int cancgen64(Node *n, Node *res);
-
-int
-is64(Type *t)
-{
-       if(t == T)
-               return 0;
-       switch(simtype[t->etype]) {
-       case TINT64:
-       case TUINT64:
-       case TPTR64:
-               return 1;
-       }
-       return 0;
-}
-
-int
-noconv(Type *t1, Type *t2)
-{
-       int e1, e2;
-
-       e1 = simtype[t1->etype];
-       e2 = simtype[t2->etype];
-
-       switch(e1) {
-       case TINT8:
-       case TUINT8:
-               return e2 == TINT8 || e2 == TUINT8;
-
-       case TINT16:
-       case TUINT16:
-               return e2 == TINT16 || e2 == TUINT16;
-
-       case TINT32:
-       case TUINT32:
-       case TPTR32:
-               return e2 == TINT32 || e2 == TUINT32 || e2 == TPTR32;
-
-       case TINT64:
-       case TUINT64:
-       case TPTR64:
-               return e2 == TINT64 || e2 == TUINT64 || e2 == TPTR64;
-
-       case TFLOAT32:
-               return e2 == TFLOAT32;
-
-       case TFLOAT64:
-               return e2 == TFLOAT64;
-       }
-       return 0;
-}
-
  /*
   * generate:
   *     res = n;
@@ -84,11 +32,16 @@ cgen(Node *n, Node *res)
         if(res == N || res->type == T)
                 fatal("cgen: res nil");
  
+       // static initializations
+       if(initflag && gen_as_init(n, res))
+               return;
+
         // function calls on both sides?  introduce temporary
         if(n->ullman >= UINF && res->ullman >= UINF) {
-               tempname(&n1, n->type);
+               tempalloc(&n1, n->type);
                 cgen(n, &n1);
                 cgen(&n1, res);
+               tempfree(&n1);
                 return;
         }
  
@@ -125,10 +78,6 @@ cgen(Node *n, Node *res)
         // otherwise, the result is addressable but n is not.
         // let's do some computation.
  
-       // 64-bit ops are hard on 32-bit machine.
-       if(is64(n->type) && cancgen64(n, res))
-               return;
-
         // use ullman to pick operand to eval first.
         nl = n->left;
         nr = n->right;
@@ -144,6 +93,25 @@ cgen(Node *n, Node *res)
                 return;
         }
  
+       // 64-bit ops are hard on 32-bit machine.
+       if(is64(n->type) || is64(res->type) || n->left != N && is64(n->left->type)) {
+               switch(n->op) {
+               // math goes to cgen64.
+               case OMINUS:
+               case OCOM:
+               case OADD:
+               case OSUB:
+               case OMUL:
+               case OLSH:
+               case ORSH:
+               case OAND:
+               case OOR:
+               case OXOR:
+                       cgen64(n, res);
+                       return;
+               }
+       }
+
         if(isfloat[n->type->etype] && isfloat[nl->type->etype])
                 goto flt;
  
@@ -178,6 +146,7 @@ cgen(Node *n, Node *res)
                 return;
  
         case OMINUS:
+       case OCOM:
                 a = optoas(n->op, nl->type);
                 goto uop;
  
@@ -218,8 +187,8 @@ cgen(Node *n, Node *res)
                 break;
  
         case OLEN:
-               if(istype(nl->type, TSTRING) || istype(nl->type, TMAP)) {
-                       // both string and map have len in the first 32-bit word.
+               if(istype(nl->type, TMAP)) {
+                       // map has len in the first 32-bit word.
                         // a zero pointer means zero length
                         tempalloc(&n1, types[tptr]);
                         cgen(nl, &n1);
@@ -243,7 +212,9 @@ cgen(Node *n, Node *res)
                         regfree(&n1);
                         break;
                 }
-               if(isslice(nl->type)) {
+               if(istype(nl->type, TSTRING) || isslice(nl->type)) {
+                       // both slice and string have len one pointer into the struct.
+                       // a zero pointer means zero length
                         igen(nl, &n1, res);
                         n1.op = OINDREG;
                         n1.type = types[TUINT32];
@@ -289,10 +260,6 @@ cgen(Node *n, Node *res)
  
         case OMOD:
         case ODIV:
-               if(isfloat[n->type->etype]) {
-                       a = optoas(n->op, nl->type);
-                       goto abop;
-               }
                 cgen_div(n->op, nl, nr, res);
                 break;
  
@@ -340,8 +307,19 @@ uop:       // unary
         return;
  
  flt:   // floating-point.  387 (not SSE2) to interoperate with 6c
-       nodreg(&f0, n->type, D_F0);
+       nodreg(&f0, nl->type, D_F0);
         nodreg(&f1, n->type, D_F0+1);
+       if(nr != N)
+               goto flt2;
+
+       // unary
+       cgen(nl, &f0);
+       if(n->op != OCONV)
+               gins(foptoas(n->op, n->type, 0), &f0, &f0);
+       gmove(&f0, res);
+       return;
+
+flt2:  // binary
         if(nl->ullman >= nr->ullman) {
                 cgen(nl, &f0);
                 if(nr->addable)
@@ -402,7 +380,7 @@ agen(Node *n, Node *res)
                 fatal("agen %O", n->op);
  
         case OCONV:
-               if(!eqtype(n->type, nl->type))
+               if(!cvttype(n->type, nl->type))
                         fatal("agen: non-trivial OCONV");
                 agen(nl, res);
                 break;
@@ -427,8 +405,11 @@ agen(Node *n, Node *res)
                 if(nr->addable) {
                         agenr(nl, &n3, res);
                         if(!isconst(nr, CTINT)) {
+                               tempalloc(&tmp, nr->type);
+                               cgen(nr, &tmp);
                                 regalloc(&n1, nr->type, N);
-                               cgen(nr, &n1);
+                               gmove(&tmp, &n1);
+                               tempfree(&tmp);
                         }
                 } else if(nl->addable) {
                         if(!isconst(nr, CTINT)) {
@@ -640,7 +621,7 @@ bgen(Node *n, int true, Prog *to)
  {
         int et, a;
         Node *nl, *nr, *r;
-       Node n1, n2, tmp;
+       Node n1, n2, tmp, t1, t2, ax;
         Prog *p1, *p2;
  
         if(debug['g']) {
@@ -778,6 +759,37 @@ bgen(Node *n, int true, Prog *to)
                         break;
                 }
  
+               if(isfloat[nr->type->etype]) {
+                       nodreg(&tmp, nr->type, D_F0);
+                       nodreg(&n2, nr->type, D_F0 + 1);
+                       nodreg(&ax, types[TUINT16], D_AX);
+                       et = simsimtype(nr->type);
+                       if(et == TFLOAT64) {
+                               // easy - do in FPU
+                               cgen(nr, &tmp);
+                               cgen(nl, &tmp);
+                               gins(AFUCOMPP, &tmp, &n2);
+                       } else {
+                               // NOTE(rsc): This is wrong.
+                               // It's right for comparison but presumably all the
+                               // other ops have the same problem.  We need to
+                               // figure out what the right solution is, besides
+                               // tell people to use float64.
+                               tempalloc(&t1, types[TFLOAT32]);
+                               tempalloc(&t2, types[TFLOAT32]);
+                               cgen(nr, &t1);
+                               cgen(nl, &t2);
+                               gmove(&t1, &tmp);
+                               gins(AFCOMFP, &t1, &tmp);
+                               tempfree(&t2);
+                               tempfree(&t1);
+                       }
+                       gins(AFSTSW, N, &ax);
+                       gins(ASAHF, N, N);
+                       patch(gbranch(optoas(brrev(a), nr->type), T), to);
+                       break;
+               }
+
                 if(is64(nr->type)) {
                         if(!nl->addable) {
                                 tempalloc(&n1, nl->type);
@@ -800,45 +812,43 @@ bgen(Node *n, int true, Prog *to)
                 a = optoas(a, nr->type);
  
                 if(nr->ullman >= UINF) {
-                       regalloc(&n1, nr->type, N);
-                       cgen(nr, &n1);
-
-                       tempname(&tmp, nr->type);
-                       gmove(&n1, &tmp);
-                       regfree(&n1);
+                       tempalloc(&tmp, nr->type);
+                       cgen(nr, &tmp);
  
-                       regalloc(&n1, nl->type, N);
+                       tempalloc(&n1, nl->type);
                         cgen(nl, &n1);
  
-                       regalloc(&n2, nr->type, &n2);
+                       regalloc(&n2, nr->type, N);
                         cgen(&tmp, &n2);
  
                         gins(optoas(OCMP, nr->type), &n1, &n2);
                         patch(gbranch(a, nr->type), to);
-
-                       regfree(&n1);
+                       tempfree(&n1);
+                       tempfree(&tmp);
                         regfree(&n2);
                         break;
                 }
  
-               regalloc(&n1, nl->type, N);
+               tempalloc(&n1, nl->type);
                 cgen(nl, &n1);
  
                 if(smallintconst(nr)) {
                         gins(optoas(OCMP, nr->type), &n1, nr);
                         patch(gbranch(a, nr->type), to);
-                       regfree(&n1);
+                       tempfree(&n1);
                         break;
                 }
  
+               tempalloc(&tmp, nr->type);
+               cgen(nr, &tmp);
                 regalloc(&n2, nr->type, N);
-               cgen(nr, &n2);
+               gmove(&tmp, &n2);
+               tempfree(&tmp);
  
                 gins(optoas(OCMP, nr->type), &n1, &n2);
                 patch(gbranch(a, nr->type), to);
-
-               regfree(&n1);
                 regfree(&n2);
+               tempfree(&n1);
                 break;
         }
  }
@@ -883,7 +893,7 @@ stkof(Node *n)
  void
  sgen(Node *n, Node *res, int w)
  {
-       Node nodl, nodr;
+       Node dst, src, tdst, tsrc;
         int32 c, q, odst, osrc;
  
         if(debug['g']) {
@@ -904,22 +914,29 @@ sgen(Node *n, Node *res, int w)
         osrc = stkof(n);
         odst = stkof(res);
  
-       // TODO(rsc): Should these be tempalloc instead?
-       nodreg(&nodl, types[tptr], D_DI);
-       nodreg(&nodr, types[tptr], D_SI);
-
-       if(n->ullman >= res->ullman) {
-               agen(n, &nodr);
-               agen(res, &nodl);
-       } else {
-               agen(res, &nodl);
-               agen(n, &nodr);
-       }
+       nodreg(&dst, types[tptr], D_DI);
+       nodreg(&src, types[tptr], D_SI);
+
+       tempalloc(&tsrc, types[tptr]);
+       tempalloc(&tdst, types[tptr]);
+       if(!n->addable)
+               agen(n, &tsrc);
+       if(!res->addable)
+               agen(res, &tdst);
+       if(n->addable)
+               agen(n, &src);
+       else
+               gmove(&tsrc, &src);
+       if(res->addable)
+               agen(res, &dst);
+       else
+               gmove(&tdst, &dst);
+       tempfree(&tdst);
+       tempfree(&tsrc);
  
         c = w % 4;      // bytes
         q = w / 4;      // doublewords
  
-       gins(ACLD, N, N);
         // if we are copying forward on the stack and
         // the src and dst overlap, then reverse direction
         if(osrc < odst && odst < osrc+w) {
@@ -949,6 +966,7 @@ sgen(Node *n, Node *res, int w)
                 // we leave with the flag clear
                 gins(ACLD, N, N);
         } else {
+               gins(ACLD, N, N);       // paranoia.  TODO(rsc): remove?
                 // normal direction
                 if(q >= 4) {
                         gconreg(AMOVL, q, D_CX);
@@ -966,34 +984,13 @@ sgen(Node *n, Node *res, int w)
         }
  }
  
-void
-nswap(Node *a, Node *b)
-{
-       Node t;
-
-       t = *a;
-       *a = *b;
-       *b = t;
-}
-
-Node*
-ncon(uint32 i)
-{
-       static Node n;
-
-       if(n.type == T)
-               nodconst(&n, types[TUINT32], 0);
-       mpmovecfix(n.val.u.xval, i);
-       return &n;
-}
-
  /*
   * attempt to generate 64-bit
   *     res = n
   * return 1 on success, 0 if op not handled.
   */
-static int
-cancgen64(Node *n, Node *res)
+void
+cgen64(Node *n, Node *res)
  {
         Node t1, t2, ax, dx, cx, ex, fx, *l, *r;
         Node lo1, lo2, lo3, hi1, hi2, hi3;
@@ -1001,8 +998,6 @@ cancgen64(Node *n, Node *res)
         uint64 v;
         uint32 lv, hv;
  
-       if(n->op == OCALL)
-               return 0;
         if(res->op != OINDREG && res->op != ONAME) {
                 dump("n", n);
                 dump("res", res);
@@ -1010,12 +1005,7 @@ cancgen64(Node *n, Node *res)
         }
         switch(n->op) {
         default:
-               return 0;
-
-       case ONAME:
-       case ODOT:
-               gmove(n, res);
-               return 1;
+               fatal("cgen64 %O", n->op);
  
         case OMINUS:
                 cgen(n->left, res);
@@ -1024,7 +1014,7 @@ cancgen64(Node *n, Node *res)
                 gins(AADCL, ncon(0), &hi1);
                 gins(ANEGL, N, &hi1);
                 splitclean();
-               return 1;
+               return;
  
         case OCOM:
                 cgen(n->left, res);
@@ -1032,7 +1022,7 @@ cancgen64(Node *n, Node *res)
                 gins(ANOTL, N, &lo1);
                 gins(ANOTL, N, &hi1);
                 splitclean();
-               return 1;
+               return;
  
         case OADD:
         case OSUB:
@@ -1408,7 +1398,6 @@ out:
                 tempfree(&t2);
         if(l == &t1)
                 tempfree(&t1);
-       return 1;
  }
  
  /*
diff --git a/src/cmd/8g/gg.h b/src/cmd/8g/gg.h

index ee9140b047443eca89ab1420d3f6af55fa7a2415..03f7aac6f691f63c79873acb10cdf05a671e3fce 100644 (file)
--- a/src/cmd/8g/gg.h
+++ b/src/cmd/8g/gg.h
@@ -2,7 +2,6 @@
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
-
  #include <u.h>
  #include <libc.h>
  
@@ -68,7 +67,7 @@ EXTERN        Node*   throwreturn;
  EXTERN int     maxstksize;
  
  /*
- * gen.c
+ * ggen.c
   */
  void   compile(Node*);
  void   proglist(void);
@@ -90,7 +89,7 @@ void  checklabels();
  void   ginscall(Node*, int);
  
  /*
- * cgen
+ * cgen.c
   */
  void   agen(Node*, Node*);
  void   agenr(Node *n, Node *a, Node *res);
@@ -103,10 +102,14 @@ Prog*     gins(int, Node*, Node*);
  int    samaddr(Node*, Node*);
  void   naddr(Node*, Addr*);
  void   cgen_aret(Node*, Node*);
-int    is64(Type*);
-void   cmp64(Node*, Node*, int, Prog*);
  Node*  ncon(uint32);
  
+/*
+ * cgen64.c
+ */
+void   cmp64(Node*, Node*, int, Prog*);
+void   cgen64(Node*, Node*);
+
  /*
   * gsubr.c
   */
@@ -133,9 +136,10 @@ void       tempfree(Node*);
  Node*  nodarg(Type*, int);
  void   nodreg(Node*, Type*, int);
  void   nodindreg(Node*, Type*, int);
-void   nodconst(Node*, Type*, vlong);
+void   nodconst(Node*, Type*, int64);
  void   gconreg(int, vlong, int);
  void   datagostring(Strlit*, Addr*);
+void   datastring(char*, int, Addr*);
  void   buildtxt(void);
  Plist* newplist(void);
  int    isfat(Type*);
@@ -145,6 +149,7 @@ int dotaddable(Node*, Node*);
  void   afunclit(Addr*);
  void   split64(Node*, Node*, Node*);
  void   splitclean(void);
+void   nswap(Node*, Node*);
  
  /*
   * list.c
diff --git a/src/cmd/8g/gsubr.c b/src/cmd/8g/gsubr.c

index 1d9e9967f6a2e264904cb6bee87621f328258691..4f30c606b9896da7cb64898b403581ce0efd2d91 100755 (executable)
--- a/src/cmd/8g/gsubr.c
+++ b/src/cmd/8g/gsubr.c
@@ -407,6 +407,22 @@ optoas(int op, Type *t)
                 a = ADECL;
                 break;
  
+       case CASE(OCOM, TINT8):
+       case CASE(OCOM, TUINT8):
+               a = ANOTB;
+               break;
+
+       case CASE(OCOM, TINT16):
+       case CASE(OCOM, TUINT16):
+               a = ANOTW;
+               break;
+
+       case CASE(OCOM, TINT32):
+       case CASE(OCOM, TUINT32):
+       case CASE(OCOM, TPTR32):
+               a = ANOTL;
+               break;
+
         case CASE(OMINUS, TINT8):
         case CASE(OMINUS, TUINT8):
                 a = ANEGB;
@@ -560,6 +576,10 @@ optoas(int op, Type *t)
                 a = ADIVL;
                 break;
  
+       case CASE(OEXTEND, TINT8):
+               a = ACBW;
+               break;
+
         case CASE(OEXTEND, TINT16):
                 a = ACWD;
                 break;
@@ -577,7 +597,13 @@ foptoas(int op, Type *t, int flg)
  {
         int et;
  
-       et = t->etype;
+       et = simtype[t->etype];
+
+       // If we need Fpop, it means we're working on
+       // two different floating-point registers, not memory.
+       // There the instruction only has a float64 form.
+       if(flg & Fpop)
+               et = TFLOAT64;
  
         // clear Frev if unneeded
         switch(op) {
@@ -655,6 +681,9 @@ static      int     resvd[] =
         D_CX,   // for shift
         D_DX,   // for divide
         D_SP,   // for stack
+
+       D_BL,   // because D_BX can be allocated
+       D_BH,
  };
  
  void
@@ -664,7 +693,7 @@ ginit(void)
  
         for(i=0; i<nelem(reg); i++)
                 reg[i] = 1;
-       for(i=D_AX; i<=D_DI; i++)
+       for(i=D_AL; i<=D_DI; i++)
                 reg[i] = 0;
  
         // TODO: Use MMX ?
@@ -685,7 +714,7 @@ gclean(void)
         for(i=0; i<nelem(resvd); i++)
                 reg[resvd[i]]--;
  
-       for(i=D_AX; i<=D_DI; i++)
+       for(i=D_AL; i<=D_DI; i++)
                 if(reg[i])
                         yyerror("reg %R left allocated at %lux\n", i, regpc[i]);
         for(i=D_F0; i<=D_F7; i++)
@@ -701,7 +730,7 @@ gclean(void)
  void
  regalloc(Node *n, Type *t, Node *o)
  {
-       int i, et;
+       int i, et, min, max;
  
         if(t == T)
                 fatal("regalloc: t nil");
@@ -710,6 +739,13 @@ regalloc(Node *n, Type *t, Node *o)
         switch(et) {
         case TINT8:
         case TUINT8:
+               // This is going to come back to bite us;
+               // we're not tracking tiny registers vs big ones.
+               // The hope is that because we use temporaries
+               // everywhere instead of registers, this will be okay.
+               min = D_AL;
+               max = D_BH;
+               goto try;
         case TINT16:
         case TUINT16:
         case TINT32:
@@ -719,17 +755,20 @@ regalloc(Node *n, Type *t, Node *o)
         case TPTR32:
         case TPTR64:
         case TBOOL:
+               min = D_AX;
+               max = D_DI;
+       try:
                 if(o != N && o->op == OREGISTER) {
                         i = o->val.u.reg;
-                       if(i >= D_AX && i <= D_DI)
+                       if(i >= D_AX && i <= max)
                                 goto out;
                 }
-               for(i=D_AX; i<=D_DI; i++)
+               for(i=min; i<=max; i++)
                         if(reg[i] == 0)
                                 goto out;
  
                 fprint(2, "registers allocated at\n");
-               for(i=D_AX; i<=D_DI; i++)
+               for(i=min; i<=max; i++)
                         fprint(2, "\t%R\t%#lux\n", i, regpc[i]);
                 yyerror("out of fixed registers");
                 goto err;
@@ -805,6 +844,7 @@ tempalloc(Node *n, Type *t)
         stksize += w;
         stksize = rnd(stksize, w);
         n->xoffset = -stksize;
+//print("tempalloc %d -> %d from %p\n", n->ostk, n->xoffset, __builtin_return_address(0));
         if(stksize > maxstksize)
                 maxstksize = stksize;
  }
@@ -812,6 +852,7 @@ tempalloc(Node *n, Type *t)
  void
  tempfree(Node *n)
  {
+//print("tempfree %d\n", n->xoffset);
         if(n->xoffset != -stksize)
                 fatal("tempfree %lld %d", -n->xoffset, stksize);
         stksize = n->ostk;
@@ -912,6 +953,33 @@ gconreg(int as, vlong c, int reg)
         gins(as, &n1, &n2);
  }
  
+/*
+ * swap node contents
+ */
+void
+nswap(Node *a, Node *b)
+{
+       Node t;
+
+       t = *a;
+       *a = *b;
+       *b = t;
+}
+
+/*
+ * return constant i node.
+ * overwritten by next call, but useful in calls to gins.
+ */
+Node*
+ncon(uint32 i)
+{
+       static Node n;
+
+       if(n.type == T)
+               nodconst(&n, types[TUINT32], 0);
+       mpmovecfix(n.val.u.xval, i);
+       return &n;
+}
  
  /*
   * Is this node a memory operand?
@@ -954,9 +1022,17 @@ split64(Node *n, Node *lo, Node *hi)
                         sclean[nsclean-1] = n1;
                 }
                 n = &n1;
-               // fall through
+               goto common;
         case ONAME:
+               if(n->class == PPARAMREF) {
+                       cgen(n->heapaddr, &n1);
+                       sclean[nsclean-1] = n1;
+                       // fall through.
+                       n = &n1;
+               }
+               goto common;
         case OINDREG:
+       common:
                 *lo = *n;
                 *hi = *n;
                 lo->type = types[TUINT32];
@@ -990,12 +1066,44 @@ splitclean(void)
                 regfree(&sclean[nsclean]);
  }
  
+/*
+ * set up nodes representing fp constants
+ */
+Node zerof;
+Node two64f;
+Node two63f;
+
+void
+bignodes(void)
+{
+       static int did;
+
+       if(did)
+               return;
+       did = 1;
+
+       two64f = *ncon(0);
+       two64f.type = types[TFLOAT64];
+       two64f.val.ctype = CTFLT;
+       two64f.val.u.fval = mal(sizeof *two64f.val.u.fval);
+       mpmovecflt(two64f.val.u.fval, 18446744073709551616.);
+
+       two63f = two64f;
+       two63f.val.u.fval = mal(sizeof *two63f.val.u.fval);
+       mpmovecflt(two63f.val.u.fval, 9223372036854775808.);
+
+       zerof = two64f;
+       zerof.val.u.fval = mal(sizeof *zerof.val.u.fval);
+       mpmovecflt(zerof.val.u.fval, 0);
+}
+
  void
  gmove(Node *f, Node *t)
  {
         int a, ft, tt;
         Type *cvt;
-       Node r1, r2, flo, fhi, tlo, thi, con;
+       Node r1, r2, t1, t2, flo, fhi, tlo, thi, con, f0, f1, ax, dx, cx;
+       Prog *p1, *p2, *p3;
  
         if(debug['M'])
                 print("gmove %N -> %N\n", f, t);
@@ -1004,16 +1112,19 @@ gmove(Node *f, Node *t)
         tt = simsimtype(t->type);
         cvt = t->type;
  
-       // cannot have two memory operands;
+       // cannot have two integer memory operands;
         // except 64-bit, which always copies via registers anyway.
-       if(ismem(f) && ismem(t) && !is64(f->type) && !is64(t->type))
+       if(isint[ft] && isint[tt] && !is64(f->type) && !is64(t->type) && ismem(f) && ismem(t))
                 goto hard;
  
         // convert constant to desired type
         if(f->op == OLITERAL) {
-               convconst(&con, t->type, &f->val);
+               if(tt == TFLOAT32)
+                       convconst(&con, types[TFLOAT64], &f->val);
+               else
+                       convconst(&con, t->type, &f->val);
                 f = &con;
-               ft = tt;        // so big switch will choose a simple mov
+               ft = simsimtype(con.type);
  
                 // some constants can't move directly to memory.
                 if(ismem(t)) {
@@ -1032,7 +1143,7 @@ gmove(Node *f, Node *t)
  
         switch(CASE(ft, tt)) {
         default:
-               fatal("gmove %N -> %N", f, t);
+               goto fatal;
  
         /*
          * integer copy and truncate
@@ -1057,10 +1168,9 @@ gmove(Node *f, Node *t)
         case CASE(TINT64, TUINT8):
         case CASE(TUINT64, TUINT8):
                 split64(f, &flo, &fhi);
-               regalloc(&r1, t->type, t);
+               nodreg(&r1, t->type, D_AX);
                 gins(AMOVB, &flo, &r1);
                 gins(AMOVB, &r1, t);
-               regfree(&r1);
                 splitclean();
                 return;
  
@@ -1080,10 +1190,9 @@ gmove(Node *f, Node *t)
         case CASE(TINT64, TUINT16):
         case CASE(TUINT64, TUINT16):
                 split64(f, &flo, &fhi);
-               regalloc(&r1, t->type, t);
+               nodreg(&r1, t->type, D_AX);
                 gins(AMOVW, &flo, &r1);
                 gins(AMOVW, &r1, t);
-               regfree(&r1);
                 splitclean();
                 return;
  
@@ -1099,10 +1208,9 @@ gmove(Node *f, Node *t)
         case CASE(TINT64, TUINT32):
         case CASE(TUINT64, TUINT32):
                 split64(f, &flo, &fhi);
-               regalloc(&r1, t->type, t);
+               nodreg(&r1, t->type, D_AX);
                 gins(AMOVL, &flo, &r1);
                 gins(AMOVL, &r1, t);
-               regfree(&r1);
                 splitclean();
                 return;
  
@@ -1116,14 +1224,12 @@ gmove(Node *f, Node *t)
                         gins(AMOVL, &flo, &tlo);
                         gins(AMOVL, &fhi, &thi);
                 } else {
-                       regalloc(&r1, types[TUINT32], N);
-                       regalloc(&r2, types[TUINT32], N);
+                       nodreg(&r1, t->type, D_AX);
+                       nodreg(&r2, t->type, D_DX);
                         gins(AMOVL, &flo, &r1);
                         gins(AMOVL, &fhi, &r2);
                         gins(AMOVL, &r1, &tlo);
                         gins(AMOVL, &r2, &thi);
-                       regfree(&r2);
-                       regfree(&r1);
                 }
                 splitclean();
                 splitclean();
@@ -1198,23 +1304,36 @@ gmove(Node *f, Node *t)
  
         /*
         * float to integer
-       *
+       */
         case CASE(TFLOAT32, TINT16):
         case CASE(TFLOAT32, TINT32):
         case CASE(TFLOAT32, TINT64):
         case CASE(TFLOAT64, TINT16):
         case CASE(TFLOAT64, TINT32):
         case CASE(TFLOAT64, TINT64):
+               if(t->op == OREGISTER)
+                       goto hardmem;
+               nodreg(&r1, types[ft], D_F0);
                 if(ft == TFLOAT32)
-                       gins(AFMOVF, f, &f0);
+                       gins(AFMOVF, f, &r1);
                 else
-                       gins(AFMOVD, f, &f0);
+                       gins(AFMOVD, f, &r1);
+
+               // set round to zero mode during conversion
+               tempalloc(&t1, types[TUINT16]);
+               tempalloc(&t2, types[TUINT16]);
+               gins(AFSTCW, N, &t1);
+               gins(AMOVW, ncon(0xf7f), &t2);
+               gins(AFLDCW, &t2, N);
                 if(tt == TINT16)
-                       gins(AFMOVWP, &f0, t);
+                       gins(AFMOVWP, &r1, t);
                 else if(tt == TINT32)
-                       gins(AFMOVLP, &f0, t);
+                       gins(AFMOVLP, &r1, t);
                 else
-                       gins(AFMOVVP, &f0, t);
+                       gins(AFMOVVP, &r1, t);
+               gins(AFLDCW, &t1, N);
+               tempfree(&t2);
+               tempfree(&t1);
                 return;
  
         case CASE(TFLOAT32, TINT8):
@@ -1224,139 +1343,249 @@ gmove(Node *f, Node *t)
         case CASE(TFLOAT64, TUINT16):
         case CASE(TFLOAT64, TUINT8):
                 // convert via int32.
-               cvt = types[TINT32];
-               goto hard;
+               tempalloc(&t1, types[TINT32]);
+               gmove(f, &t1);
+               switch(tt) {
+               default:
+                       fatal("gmove %T", t);
+               case TINT8:
+                       gins(ACMPL, &t1, ncon(-0x80));
+                       p1 = gbranch(optoas(OLT, types[TINT32]), T);
+                       gins(ACMPL, &t1, ncon(0x7f));
+                       p2 = gbranch(optoas(OGT, types[TINT32]), T);
+                       p3 = gbranch(AJMP, T);
+                       patch(p1, pc);
+                       patch(p2, pc);
+                       gmove(ncon(-0x80), &t1);
+                       patch(p3, pc);
+                       gmove(&t1, t);
+                       break;
+               case TUINT8:
+                       gins(ATESTL, ncon(0xffffff00), &t1);
+                       p1 = gbranch(AJEQ, T);
+                       gins(AMOVB, ncon(0), &t1);
+                       patch(p1, pc);
+                       gmove(&t1, t);
+                       break;
+               case TUINT16:
+                       gins(ATESTL, ncon(0xffff0000), &t1);
+                       p1 = gbranch(AJEQ, T);
+                       gins(AMOVW, ncon(0), &t1);
+                       patch(p1, pc);
+                       gmove(&t1, t);
+                       break;
+               }
+               tempfree(&t1);
+               return;
  
         case CASE(TFLOAT32, TUINT32):
         case CASE(TFLOAT64, TUINT32):
-               // could potentially convert via int64.
-               cvt = types[TINT64];
-               goto hard;
+               // convert via int64.
+               tempalloc(&t1, types[TINT64]);
+               gmove(f, &t1);
+               split64(&t1, &tlo, &thi);
+               gins(ACMPL, &thi, ncon(0));
+               p1 = gbranch(AJEQ, T);
+               gins(AMOVL, ncon(0), &tlo);
+               patch(p1, pc);
+               gmove(&tlo, t);
+               splitclean();
+               tempfree(&t1);
+               return;
  
         case CASE(TFLOAT32, TUINT64):
         case CASE(TFLOAT64, TUINT64):
+               bignodes();
+               nodreg(&f0, types[ft], D_F0);
+               nodreg(&f1, types[ft], D_F0 + 1);
+               nodreg(&ax, types[TUINT16], D_AX);
+
                 if(ft == TFLOAT32)
                         gins(AFMOVF, f, &f0);
                 else
                         gins(AFMOVD, f, &f0);
-               // algorithm is:
+
+               // if 0 > v { answer = 0 }
+               gmove(&zerof, &f0);
+               gins(AFUCOMP, &f0, &f1);
+               gins(AFSTSW, N, &ax);
+               gins(ASAHF, N, N);
+               p1 = gbranch(optoas(OGT, types[tt]), T);
+               // if 1<<64 <= v { answer = 0 too }
+               gmove(&two64f, &f0);
+               gins(AFUCOMP, &f0, &f1);
+               gins(AFSTSW, N, &ax);
+               gins(ASAHF, N, N);
+               p2 = gbranch(optoas(OGT, types[tt]), T);
+               patch(p1, pc);
+               gins(AFMOVVP, &f0, t);  // don't care about t, but will pop the stack
+               split64(t, &tlo, &thi);
+               gins(AMOVL, ncon(0), &tlo);
+               gins(AMOVL, ncon(0), &thi);
+               splitclean();
+               p1 = gbranch(AJMP, T);
+               patch(p2, pc);
+
+               // in range; algorithm is:
                 //      if small enough, use native float64 -> int64 conversion.
                 //      otherwise, subtract 2^63, convert, and add it back.
-               bignodes();
-               regalloc(&r1, types[ft], N);
-               regalloc(&r2, types[ft], N);
-               gins(optoas(OCMP, f->type), &bigf, &r1);
-               p1 = gbranch(optoas(OLE, f->type), T);
-               gins(a, &r1, &r2);
-               p2 = gbranch(AJMP, T);
-               patch(p1, pc);
-               gins(optoas(OAS, f->type), &bigf, &r3);
-               gins(optoas(OSUB, f->type), &r3, &r1);
-               gins(a, &r1, &r2);
-               gins(AMOVQ, &bigi, &r4);
-               gins(AXORQ, &r4, &r2);
+
+               // set round to zero mode during conversion
+               tempalloc(&t1, types[TUINT16]);
+               tempalloc(&t2, types[TUINT16]);
+               gins(AFSTCW, N, &t1);
+               gins(AMOVW, ncon(0xf7f), &t2);
+               gins(AFLDCW, &t2, N);
+               tempfree(&t2);
+
+               // actual work
+               gmove(&two63f, &f0);
+               gins(AFUCOMP, &f0, &f1);
+               gins(AFSTSW, N, &ax);
+               gins(ASAHF, N, N);
+               p2 = gbranch(optoas(OLE, types[tt]), T);
+               gins(AFMOVVP, &f0, t);
+               p3 = gbranch(AJMP, T);
                 patch(p2, pc);
-               gmove(&r2, t);
-               regfree(&r4);
-               regfree(&r3);
-               regfree(&r2);
-               regfree(&r1);
-               fatal("lazy");
+               gmove(&two63f, &f0);
+               gins(AFSUBDP, &f0, &f1);
+               gins(AFMOVVP, &f0, t);
+               split64(t, &tlo, &thi);
+               gins(AXORL, ncon(0x80000000), &thi);    // + 2^63
+               patch(p3, pc);
+               patch(p1, pc);
+               splitclean();
+
+               // restore rounding mode
+               gins(AFLDCW, &t1, N);
+               tempfree(&t1);
                 return;
-       */
+
         /*
          * integer to float
-        *
+        */
+       case CASE(TINT16, TFLOAT32):
+       case CASE(TINT16, TFLOAT64):
         case CASE(TINT32, TFLOAT32):
-               a = ACVTSL2SS;
-               goto rdst;
-
-
         case CASE(TINT32, TFLOAT64):
-               a = ACVTSL2SD;
-               goto rdst;
-
         case CASE(TINT64, TFLOAT32):
-               a = ACVTSQ2SS;
-               goto rdst;
-
         case CASE(TINT64, TFLOAT64):
-               a = ACVTSQ2SD;
-               goto rdst;
+               if(t->op != OREGISTER)
+                       goto hard;
+               if(f->op == OREGISTER) {
+                       cvt = f->type;
+                       goto hardmem;
+               }
+               switch(ft) {
+               case TINT16:
+                       a = AFMOVW;
+                       break;
+               case TINT32:
+                       a = AFMOVL;
+                       break;
+               default:
+                       a = AFMOVV;
+                       break;
+               }
+               break;
  
-       case CASE(TINT16, TFLOAT32):
-       case CASE(TINT16, TFLOAT64):
         case CASE(TINT8, TFLOAT32):
         case CASE(TINT8, TFLOAT64):
         case CASE(TUINT16, TFLOAT32):
         case CASE(TUINT16, TFLOAT64):
         case CASE(TUINT8, TFLOAT32):
         case CASE(TUINT8, TFLOAT64):
-               // convert via int32
+               // convert via int32 memory
                 cvt = types[TINT32];
-               goto hard;
+               goto hardmem;
  
         case CASE(TUINT32, TFLOAT32):
         case CASE(TUINT32, TFLOAT64):
-               // convert via int64.
+               // convert via int64 memory
                 cvt = types[TINT64];
-               goto hard;
+               goto hardmem;
  
         case CASE(TUINT64, TFLOAT32):
         case CASE(TUINT64, TFLOAT64):
                 // algorithm is:
                 //      if small enough, use native int64 -> uint64 conversion.
                 //      otherwise, halve (rounding to odd?), convert, and double.
-               a = ACVTSQ2SS;
-               if(tt == TFLOAT64)
-                       a = ACVTSQ2SD;
-               nodconst(&zero, types[TUINT64], 0);
-               nodconst(&one, types[TUINT64], 1);
-               regalloc(&r1, f->type, f);
-               regalloc(&r2, t->type, t);
-               regalloc(&r3, f->type, N);
-               regalloc(&r4, f->type, N);
-               gmove(f, &r1);
-               gins(ACMPQ, &r1, &zero);
+               nodreg(&ax, types[TUINT32], D_AX);
+               nodreg(&dx, types[TUINT32], D_DX);
+               nodreg(&cx, types[TUINT32], D_CX);
+               tempalloc(&t1, f->type);
+               split64(&t1, &tlo, &thi);
+               gmove(f, &t1);
+               gins(ACMPL, &thi, ncon(0));
                 p1 = gbranch(AJLT, T);
-               gins(a, &r1, &r2);
+               // native
+               t1.type = types[TINT64];
+               gmove(&t1, t);
                 p2 = gbranch(AJMP, T);
+               // simulated
                 patch(p1, pc);
-               gmove(&r1, &r3);
-               gins(ASHRQ, &one, &r3);
-               gmove(&r1, &r4);
-               gins(AANDL, &one, &r4);
-               gins(AORQ, &r4, &r3);
-               gins(a, &r3, &r2);
-               gins(optoas(OADD, t->type), &r2, &r2);
+               gmove(&tlo, &ax);
+               gmove(&thi, &dx);
+               p1 = gins(ASHRL, ncon(1), &ax);
+               p1->from.index = D_DX;  // double-width shift DX -> AX
+               p1->from.scale = 0;
+               gins(ASETCC, N, &cx);
+               gins(AORB, &cx, &ax);
+               gins(ASHRL, ncon(1), &dx);
+               gmove(&dx, &thi);
+               gmove(&ax, &tlo);
+               nodreg(&r1, types[tt], D_F0);
+               nodreg(&r2, types[tt], D_F0 + 1);
+               gmove(&t1, &r1);        // t1.type is TINT64 now, set above
+               gins(AFMOVD, &r1, &r1);
+               gins(AFADDDP, &r1, &r2);
+               gmove(&r1, t);
                 patch(p2, pc);
-               gmove(&r2, t);
-               regfree(&r4);
-               regfree(&r3);
-               regfree(&r2);
-               regfree(&r1);
+               splitclean();
+               tempfree(&t1);
                 return;
-       */
+
         /*
          * float to float
          */
         case CASE(TFLOAT32, TFLOAT32):
-               a = AFMOVF;
-               break;
-
         case CASE(TFLOAT64, TFLOAT64):
-               a = AFMOVD;
+               // The way the code generator uses floating-point
+               // registers, a move from F0 to F0 is intended as a no-op.
+               // On the x86, it's not: it pushes a second copy of F0
+               // on the floating point stack.  So toss it away here.
+               // Also, F0 is the *only* register we ever evaluate
+               // into, so we should only see register/register as F0/F0.
+               if(f->op == OREGISTER && t->op == OREGISTER) {
+                       if(f->val.u.reg != D_F0 || t->val.u.reg != D_F0)
+                               goto fatal;
+                       return;
+               }
+               if(ismem(f) && ismem(t))
+                       goto hard;
+               a = AFMOVF;
+               if(ft == TFLOAT64)
+                       a = AFMOVD;
+               if(ismem(t)) {
+                       a = AFMOVFP;
+                       if(ft == TFLOAT64)
+                               a = AFMOVDP;
+               }
                 break;
  
-       /*
         case CASE(TFLOAT32, TFLOAT64):
-               a = ACVTSS2SD;
-               goto rdst;
+               if(f->op == OREGISTER)
+                       gins(AFMOVD, f, t);
+               else
+                       gins(AFMOVF, f, t);
+               return;
  
         case CASE(TFLOAT64, TFLOAT32):
-               a = ACVTSD2SS;
-               goto rdst;
-       */
+               if(f->op == OREGISTER)
+                       gins(AFMOVF, f, t);
+               else
+                       gins(AFMOVD, f, t);
+               return;
         }
  
         gins(a, f, t);
@@ -1377,6 +1606,18 @@ hard:
         gmove(&r1, t);
         regfree(&r1);
         return;
+
+hardmem:
+       // requires memory intermediate
+       tempalloc(&r1, cvt);
+       gmove(f, &r1);
+       gmove(&r1, t);
+       tempfree(&r1);
+       return;
+
+fatal:
+       // should not happen
+       fatal("gmove %N -> %N", f, t);
  }
  
  int
author	Russ Cox <rsc@golang.org>
	Wed, 3 Jun 2009 06:25:17 +0000 (23:25 -0700)
committer	Russ Cox <rsc@golang.org>
	Wed, 3 Jun 2009 06:25:17 +0000 (23:25 -0700)
src/cmd/8g/cgen.c		patch \| blob \| history
src/cmd/8g/gg.h		patch \| blob \| history
src/cmd/8g/gsubr.c		patch \| blob \| history