#include "gg.h"
-static int cancgen64(Node *n, Node *res);
-
-int
-is64(Type *t)
-{
- if(t == T)
- return 0;
- switch(simtype[t->etype]) {
- case TINT64:
- case TUINT64:
- case TPTR64:
- return 1;
- }
- return 0;
-}
-
-int
-noconv(Type *t1, Type *t2)
-{
- int e1, e2;
-
- e1 = simtype[t1->etype];
- e2 = simtype[t2->etype];
-
- switch(e1) {
- case TINT8:
- case TUINT8:
- return e2 == TINT8 || e2 == TUINT8;
-
- case TINT16:
- case TUINT16:
- return e2 == TINT16 || e2 == TUINT16;
-
- case TINT32:
- case TUINT32:
- case TPTR32:
- return e2 == TINT32 || e2 == TUINT32 || e2 == TPTR32;
-
- case TINT64:
- case TUINT64:
- case TPTR64:
- return e2 == TINT64 || e2 == TUINT64 || e2 == TPTR64;
-
- case TFLOAT32:
- return e2 == TFLOAT32;
-
- case TFLOAT64:
- return e2 == TFLOAT64;
- }
- return 0;
-}
-
/*
* generate:
* res = n;
if(res == N || res->type == T)
fatal("cgen: res nil");
+ // static initializations
+ if(initflag && gen_as_init(n, res))
+ return;
+
// function calls on both sides? introduce temporary
if(n->ullman >= UINF && res->ullman >= UINF) {
- tempname(&n1, n->type);
+ tempalloc(&n1, n->type);
cgen(n, &n1);
cgen(&n1, res);
+ tempfree(&n1);
return;
}
// otherwise, the result is addressable but n is not.
// let's do some computation.
- // 64-bit ops are hard on 32-bit machine.
- if(is64(n->type) && cancgen64(n, res))
- return;
-
// use ullman to pick operand to eval first.
nl = n->left;
nr = n->right;
return;
}
+ // 64-bit ops are hard on 32-bit machine.
+ if(is64(n->type) || is64(res->type) || n->left != N && is64(n->left->type)) {
+ switch(n->op) {
+ // math goes to cgen64.
+ case OMINUS:
+ case OCOM:
+ case OADD:
+ case OSUB:
+ case OMUL:
+ case OLSH:
+ case ORSH:
+ case OAND:
+ case OOR:
+ case OXOR:
+ cgen64(n, res);
+ return;
+ }
+ }
+
if(isfloat[n->type->etype] && isfloat[nl->type->etype])
goto flt;
return;
case OMINUS:
+ case OCOM:
a = optoas(n->op, nl->type);
goto uop;
break;
case OLEN:
- if(istype(nl->type, TSTRING) || istype(nl->type, TMAP)) {
- // both string and map have len in the first 32-bit word.
+ if(istype(nl->type, TMAP)) {
+ // map has len in the first 32-bit word.
// a zero pointer means zero length
tempalloc(&n1, types[tptr]);
cgen(nl, &n1);
regfree(&n1);
break;
}
- if(isslice(nl->type)) {
+ if(istype(nl->type, TSTRING) || isslice(nl->type)) {
+ // both slice and string have len one pointer into the struct.
+ // a zero pointer means zero length
igen(nl, &n1, res);
n1.op = OINDREG;
n1.type = types[TUINT32];
case OMOD:
case ODIV:
- if(isfloat[n->type->etype]) {
- a = optoas(n->op, nl->type);
- goto abop;
- }
cgen_div(n->op, nl, nr, res);
break;
return;
flt: // floating-point. 387 (not SSE2) to interoperate with 6c
- nodreg(&f0, n->type, D_F0);
+ nodreg(&f0, nl->type, D_F0);
nodreg(&f1, n->type, D_F0+1);
+ if(nr != N)
+ goto flt2;
+
+ // unary
+ cgen(nl, &f0);
+ if(n->op != OCONV)
+ gins(foptoas(n->op, n->type, 0), &f0, &f0);
+ gmove(&f0, res);
+ return;
+
+flt2: // binary
if(nl->ullman >= nr->ullman) {
cgen(nl, &f0);
if(nr->addable)
fatal("agen %O", n->op);
case OCONV:
- if(!eqtype(n->type, nl->type))
+ if(!cvttype(n->type, nl->type))
fatal("agen: non-trivial OCONV");
agen(nl, res);
break;
if(nr->addable) {
agenr(nl, &n3, res);
if(!isconst(nr, CTINT)) {
+ tempalloc(&tmp, nr->type);
+ cgen(nr, &tmp);
regalloc(&n1, nr->type, N);
- cgen(nr, &n1);
+ gmove(&tmp, &n1);
+ tempfree(&tmp);
}
} else if(nl->addable) {
if(!isconst(nr, CTINT)) {
{
int et, a;
Node *nl, *nr, *r;
- Node n1, n2, tmp;
+ Node n1, n2, tmp, t1, t2, ax;
Prog *p1, *p2;
if(debug['g']) {
break;
}
+ if(isfloat[nr->type->etype]) {
+ nodreg(&tmp, nr->type, D_F0);
+ nodreg(&n2, nr->type, D_F0 + 1);
+ nodreg(&ax, types[TUINT16], D_AX);
+ et = simsimtype(nr->type);
+ if(et == TFLOAT64) {
+ // easy - do in FPU
+ cgen(nr, &tmp);
+ cgen(nl, &tmp);
+ gins(AFUCOMPP, &tmp, &n2);
+ } else {
+ // NOTE(rsc): This is wrong.
+ // It's right for comparison but presumably all the
+ // other ops have the same problem. We need to
+ // figure out what the right solution is, besides
+ // tell people to use float64.
+ tempalloc(&t1, types[TFLOAT32]);
+ tempalloc(&t2, types[TFLOAT32]);
+ cgen(nr, &t1);
+ cgen(nl, &t2);
+ gmove(&t1, &tmp);
+ gins(AFCOMFP, &t1, &tmp);
+ tempfree(&t2);
+ tempfree(&t1);
+ }
+ gins(AFSTSW, N, &ax);
+ gins(ASAHF, N, N);
+ patch(gbranch(optoas(brrev(a), nr->type), T), to);
+ break;
+ }
+
if(is64(nr->type)) {
if(!nl->addable) {
tempalloc(&n1, nl->type);
a = optoas(a, nr->type);
if(nr->ullman >= UINF) {
- regalloc(&n1, nr->type, N);
- cgen(nr, &n1);
-
- tempname(&tmp, nr->type);
- gmove(&n1, &tmp);
- regfree(&n1);
+ tempalloc(&tmp, nr->type);
+ cgen(nr, &tmp);
- regalloc(&n1, nl->type, N);
+ tempalloc(&n1, nl->type);
cgen(nl, &n1);
- regalloc(&n2, nr->type, &n2);
+ regalloc(&n2, nr->type, N);
cgen(&tmp, &n2);
gins(optoas(OCMP, nr->type), &n1, &n2);
patch(gbranch(a, nr->type), to);
-
- regfree(&n1);
+ tempfree(&n1);
+ tempfree(&tmp);
regfree(&n2);
break;
}
- regalloc(&n1, nl->type, N);
+ tempalloc(&n1, nl->type);
cgen(nl, &n1);
if(smallintconst(nr)) {
gins(optoas(OCMP, nr->type), &n1, nr);
patch(gbranch(a, nr->type), to);
- regfree(&n1);
+ tempfree(&n1);
break;
}
+ tempalloc(&tmp, nr->type);
+ cgen(nr, &tmp);
regalloc(&n2, nr->type, N);
- cgen(nr, &n2);
+ gmove(&tmp, &n2);
+ tempfree(&tmp);
gins(optoas(OCMP, nr->type), &n1, &n2);
patch(gbranch(a, nr->type), to);
-
- regfree(&n1);
regfree(&n2);
+ tempfree(&n1);
break;
}
}
void
sgen(Node *n, Node *res, int w)
{
- Node nodl, nodr;
+ Node dst, src, tdst, tsrc;
int32 c, q, odst, osrc;
if(debug['g']) {
osrc = stkof(n);
odst = stkof(res);
- // TODO(rsc): Should these be tempalloc instead?
- nodreg(&nodl, types[tptr], D_DI);
- nodreg(&nodr, types[tptr], D_SI);
-
- if(n->ullman >= res->ullman) {
- agen(n, &nodr);
- agen(res, &nodl);
- } else {
- agen(res, &nodl);
- agen(n, &nodr);
- }
+ nodreg(&dst, types[tptr], D_DI);
+ nodreg(&src, types[tptr], D_SI);
+
+ tempalloc(&tsrc, types[tptr]);
+ tempalloc(&tdst, types[tptr]);
+ if(!n->addable)
+ agen(n, &tsrc);
+ if(!res->addable)
+ agen(res, &tdst);
+ if(n->addable)
+ agen(n, &src);
+ else
+ gmove(&tsrc, &src);
+ if(res->addable)
+ agen(res, &dst);
+ else
+ gmove(&tdst, &dst);
+ tempfree(&tdst);
+ tempfree(&tsrc);
c = w % 4; // bytes
q = w / 4; // doublewords
- gins(ACLD, N, N);
// if we are copying forward on the stack and
// the src and dst overlap, then reverse direction
if(osrc < odst && odst < osrc+w) {
// we leave with the flag clear
gins(ACLD, N, N);
} else {
+ gins(ACLD, N, N); // paranoia. TODO(rsc): remove?
// normal direction
if(q >= 4) {
gconreg(AMOVL, q, D_CX);
}
}
-void
-nswap(Node *a, Node *b)
-{
- Node t;
-
- t = *a;
- *a = *b;
- *b = t;
-}
-
-Node*
-ncon(uint32 i)
-{
- static Node n;
-
- if(n.type == T)
- nodconst(&n, types[TUINT32], 0);
- mpmovecfix(n.val.u.xval, i);
- return &n;
-}
-
/*
* attempt to generate 64-bit
* res = n
* return 1 on success, 0 if op not handled.
*/
-static int
-cancgen64(Node *n, Node *res)
+void
+cgen64(Node *n, Node *res)
{
Node t1, t2, ax, dx, cx, ex, fx, *l, *r;
Node lo1, lo2, lo3, hi1, hi2, hi3;
uint64 v;
uint32 lv, hv;
- if(n->op == OCALL)
- return 0;
if(res->op != OINDREG && res->op != ONAME) {
dump("n", n);
dump("res", res);
}
switch(n->op) {
default:
- return 0;
-
- case ONAME:
- case ODOT:
- gmove(n, res);
- return 1;
+ fatal("cgen64 %O", n->op);
case OMINUS:
cgen(n->left, res);
gins(AADCL, ncon(0), &hi1);
gins(ANEGL, N, &hi1);
splitclean();
- return 1;
+ return;
case OCOM:
cgen(n->left, res);
gins(ANOTL, N, &lo1);
gins(ANOTL, N, &hi1);
splitclean();
- return 1;
+ return;
case OADD:
case OSUB:
tempfree(&t2);
if(l == &t1)
tempfree(&t1);
- return 1;
}
/*
a = ADECL;
break;
+ case CASE(OCOM, TINT8):
+ case CASE(OCOM, TUINT8):
+ a = ANOTB;
+ break;
+
+ case CASE(OCOM, TINT16):
+ case CASE(OCOM, TUINT16):
+ a = ANOTW;
+ break;
+
+ case CASE(OCOM, TINT32):
+ case CASE(OCOM, TUINT32):
+ case CASE(OCOM, TPTR32):
+ a = ANOTL;
+ break;
+
case CASE(OMINUS, TINT8):
case CASE(OMINUS, TUINT8):
a = ANEGB;
a = ADIVL;
break;
+ case CASE(OEXTEND, TINT8):
+ a = ACBW;
+ break;
+
case CASE(OEXTEND, TINT16):
a = ACWD;
break;
{
int et;
- et = t->etype;
+ et = simtype[t->etype];
+
+ // If we need Fpop, it means we're working on
+ // two different floating-point registers, not memory.
+ // There the instruction only has a float64 form.
+ if(flg & Fpop)
+ et = TFLOAT64;
// clear Frev if unneeded
switch(op) {
D_CX, // for shift
D_DX, // for divide
D_SP, // for stack
+
+ D_BL, // because D_BX can be allocated
+ D_BH,
};
void
for(i=0; i<nelem(reg); i++)
reg[i] = 1;
- for(i=D_AX; i<=D_DI; i++)
+ for(i=D_AL; i<=D_DI; i++)
reg[i] = 0;
// TODO: Use MMX ?
for(i=0; i<nelem(resvd); i++)
reg[resvd[i]]--;
- for(i=D_AX; i<=D_DI; i++)
+ for(i=D_AL; i<=D_DI; i++)
if(reg[i])
yyerror("reg %R left allocated at %lux\n", i, regpc[i]);
for(i=D_F0; i<=D_F7; i++)
void
regalloc(Node *n, Type *t, Node *o)
{
- int i, et;
+ int i, et, min, max;
if(t == T)
fatal("regalloc: t nil");
switch(et) {
case TINT8:
case TUINT8:
+ // This is going to come back to bite us;
+ // we're not tracking tiny registers vs big ones.
+ // The hope is that because we use temporaries
+ // everywhere instead of registers, this will be okay.
+ min = D_AL;
+ max = D_BH;
+ goto try;
case TINT16:
case TUINT16:
case TINT32:
case TPTR32:
case TPTR64:
case TBOOL:
+ min = D_AX;
+ max = D_DI;
+ try:
if(o != N && o->op == OREGISTER) {
i = o->val.u.reg;
- if(i >= D_AX && i <= D_DI)
+ if(i >= D_AX && i <= max)
goto out;
}
- for(i=D_AX; i<=D_DI; i++)
+ for(i=min; i<=max; i++)
if(reg[i] == 0)
goto out;
fprint(2, "registers allocated at\n");
- for(i=D_AX; i<=D_DI; i++)
+ for(i=min; i<=max; i++)
fprint(2, "\t%R\t%#lux\n", i, regpc[i]);
yyerror("out of fixed registers");
goto err;
stksize += w;
stksize = rnd(stksize, w);
n->xoffset = -stksize;
+//print("tempalloc %d -> %d from %p\n", n->ostk, n->xoffset, __builtin_return_address(0));
if(stksize > maxstksize)
maxstksize = stksize;
}
void
tempfree(Node *n)
{
+//print("tempfree %d\n", n->xoffset);
if(n->xoffset != -stksize)
fatal("tempfree %lld %d", -n->xoffset, stksize);
stksize = n->ostk;
gins(as, &n1, &n2);
}
+/*
+ * swap node contents
+ */
+void
+nswap(Node *a, Node *b)
+{
+ Node t;
+
+ t = *a;
+ *a = *b;
+ *b = t;
+}
+
+/*
+ * return constant i node.
+ * overwritten by next call, but useful in calls to gins.
+ */
+Node*
+ncon(uint32 i)
+{
+ static Node n;
+
+ if(n.type == T)
+ nodconst(&n, types[TUINT32], 0);
+ mpmovecfix(n.val.u.xval, i);
+ return &n;
+}
/*
* Is this node a memory operand?
sclean[nsclean-1] = n1;
}
n = &n1;
- // fall through
+ goto common;
case ONAME:
+ if(n->class == PPARAMREF) {
+ cgen(n->heapaddr, &n1);
+ sclean[nsclean-1] = n1;
+ // fall through.
+ n = &n1;
+ }
+ goto common;
case OINDREG:
+ common:
*lo = *n;
*hi = *n;
lo->type = types[TUINT32];
regfree(&sclean[nsclean]);
}
+/*
+ * set up nodes representing fp constants
+ */
+Node zerof;
+Node two64f;
+Node two63f;
+
+void
+bignodes(void)
+{
+ static int did;
+
+ if(did)
+ return;
+ did = 1;
+
+ two64f = *ncon(0);
+ two64f.type = types[TFLOAT64];
+ two64f.val.ctype = CTFLT;
+ two64f.val.u.fval = mal(sizeof *two64f.val.u.fval);
+ mpmovecflt(two64f.val.u.fval, 18446744073709551616.);
+
+ two63f = two64f;
+ two63f.val.u.fval = mal(sizeof *two63f.val.u.fval);
+ mpmovecflt(two63f.val.u.fval, 9223372036854775808.);
+
+ zerof = two64f;
+ zerof.val.u.fval = mal(sizeof *zerof.val.u.fval);
+ mpmovecflt(zerof.val.u.fval, 0);
+}
+
void
gmove(Node *f, Node *t)
{
int a, ft, tt;
Type *cvt;
- Node r1, r2, flo, fhi, tlo, thi, con;
+ Node r1, r2, t1, t2, flo, fhi, tlo, thi, con, f0, f1, ax, dx, cx;
+ Prog *p1, *p2, *p3;
if(debug['M'])
print("gmove %N -> %N\n", f, t);
tt = simsimtype(t->type);
cvt = t->type;
- // cannot have two memory operands;
+ // cannot have two integer memory operands;
// except 64-bit, which always copies via registers anyway.
- if(ismem(f) && ismem(t) && !is64(f->type) && !is64(t->type))
+ if(isint[ft] && isint[tt] && !is64(f->type) && !is64(t->type) && ismem(f) && ismem(t))
goto hard;
// convert constant to desired type
if(f->op == OLITERAL) {
- convconst(&con, t->type, &f->val);
+ if(tt == TFLOAT32)
+ convconst(&con, types[TFLOAT64], &f->val);
+ else
+ convconst(&con, t->type, &f->val);
f = &con;
- ft = tt; // so big switch will choose a simple mov
+ ft = simsimtype(con.type);
// some constants can't move directly to memory.
if(ismem(t)) {
switch(CASE(ft, tt)) {
default:
- fatal("gmove %N -> %N", f, t);
+ goto fatal;
/*
* integer copy and truncate
case CASE(TINT64, TUINT8):
case CASE(TUINT64, TUINT8):
split64(f, &flo, &fhi);
- regalloc(&r1, t->type, t);
+ nodreg(&r1, t->type, D_AX);
gins(AMOVB, &flo, &r1);
gins(AMOVB, &r1, t);
- regfree(&r1);
splitclean();
return;
case CASE(TINT64, TUINT16):
case CASE(TUINT64, TUINT16):
split64(f, &flo, &fhi);
- regalloc(&r1, t->type, t);
+ nodreg(&r1, t->type, D_AX);
gins(AMOVW, &flo, &r1);
gins(AMOVW, &r1, t);
- regfree(&r1);
splitclean();
return;
case CASE(TINT64, TUINT32):
case CASE(TUINT64, TUINT32):
split64(f, &flo, &fhi);
- regalloc(&r1, t->type, t);
+ nodreg(&r1, t->type, D_AX);
gins(AMOVL, &flo, &r1);
gins(AMOVL, &r1, t);
- regfree(&r1);
splitclean();
return;
gins(AMOVL, &flo, &tlo);
gins(AMOVL, &fhi, &thi);
} else {
- regalloc(&r1, types[TUINT32], N);
- regalloc(&r2, types[TUINT32], N);
+ nodreg(&r1, t->type, D_AX);
+ nodreg(&r2, t->type, D_DX);
gins(AMOVL, &flo, &r1);
gins(AMOVL, &fhi, &r2);
gins(AMOVL, &r1, &tlo);
gins(AMOVL, &r2, &thi);
- regfree(&r2);
- regfree(&r1);
}
splitclean();
splitclean();
/*
* float to integer
- *
+ */
case CASE(TFLOAT32, TINT16):
case CASE(TFLOAT32, TINT32):
case CASE(TFLOAT32, TINT64):
case CASE(TFLOAT64, TINT16):
case CASE(TFLOAT64, TINT32):
case CASE(TFLOAT64, TINT64):
+ if(t->op == OREGISTER)
+ goto hardmem;
+ nodreg(&r1, types[ft], D_F0);
if(ft == TFLOAT32)
- gins(AFMOVF, f, &f0);
+ gins(AFMOVF, f, &r1);
else
- gins(AFMOVD, f, &f0);
+ gins(AFMOVD, f, &r1);
+
+ // set round to zero mode during conversion
+ tempalloc(&t1, types[TUINT16]);
+ tempalloc(&t2, types[TUINT16]);
+ gins(AFSTCW, N, &t1);
+ gins(AMOVW, ncon(0xf7f), &t2);
+ gins(AFLDCW, &t2, N);
if(tt == TINT16)
- gins(AFMOVWP, &f0, t);
+ gins(AFMOVWP, &r1, t);
else if(tt == TINT32)
- gins(AFMOVLP, &f0, t);
+ gins(AFMOVLP, &r1, t);
else
- gins(AFMOVVP, &f0, t);
+ gins(AFMOVVP, &r1, t);
+ gins(AFLDCW, &t1, N);
+ tempfree(&t2);
+ tempfree(&t1);
return;
case CASE(TFLOAT32, TINT8):
case CASE(TFLOAT64, TUINT16):
case CASE(TFLOAT64, TUINT8):
// convert via int32.
- cvt = types[TINT32];
- goto hard;
+ tempalloc(&t1, types[TINT32]);
+ gmove(f, &t1);
+ switch(tt) {
+ default:
+ fatal("gmove %T", t);
+ case TINT8:
+ gins(ACMPL, &t1, ncon(-0x80));
+ p1 = gbranch(optoas(OLT, types[TINT32]), T);
+ gins(ACMPL, &t1, ncon(0x7f));
+ p2 = gbranch(optoas(OGT, types[TINT32]), T);
+ p3 = gbranch(AJMP, T);
+ patch(p1, pc);
+ patch(p2, pc);
+ gmove(ncon(-0x80), &t1);
+ patch(p3, pc);
+ gmove(&t1, t);
+ break;
+ case TUINT8:
+ gins(ATESTL, ncon(0xffffff00), &t1);
+ p1 = gbranch(AJEQ, T);
+ gins(AMOVB, ncon(0), &t1);
+ patch(p1, pc);
+ gmove(&t1, t);
+ break;
+ case TUINT16:
+ gins(ATESTL, ncon(0xffff0000), &t1);
+ p1 = gbranch(AJEQ, T);
+ gins(AMOVW, ncon(0), &t1);
+ patch(p1, pc);
+ gmove(&t1, t);
+ break;
+ }
+ tempfree(&t1);
+ return;
case CASE(TFLOAT32, TUINT32):
case CASE(TFLOAT64, TUINT32):
- // could potentially convert via int64.
- cvt = types[TINT64];
- goto hard;
+ // convert via int64.
+ tempalloc(&t1, types[TINT64]);
+ gmove(f, &t1);
+ split64(&t1, &tlo, &thi);
+ gins(ACMPL, &thi, ncon(0));
+ p1 = gbranch(AJEQ, T);
+ gins(AMOVL, ncon(0), &tlo);
+ patch(p1, pc);
+ gmove(&tlo, t);
+ splitclean();
+ tempfree(&t1);
+ return;
case CASE(TFLOAT32, TUINT64):
case CASE(TFLOAT64, TUINT64):
+ bignodes();
+ nodreg(&f0, types[ft], D_F0);
+ nodreg(&f1, types[ft], D_F0 + 1);
+ nodreg(&ax, types[TUINT16], D_AX);
+
if(ft == TFLOAT32)
gins(AFMOVF, f, &f0);
else
gins(AFMOVD, f, &f0);
- // algorithm is:
+
+ // if 0 > v { answer = 0 }
+ gmove(&zerof, &f0);
+ gins(AFUCOMP, &f0, &f1);
+ gins(AFSTSW, N, &ax);
+ gins(ASAHF, N, N);
+ p1 = gbranch(optoas(OGT, types[tt]), T);
+ // if 1<<64 <= v { answer = 0 too }
+ gmove(&two64f, &f0);
+ gins(AFUCOMP, &f0, &f1);
+ gins(AFSTSW, N, &ax);
+ gins(ASAHF, N, N);
+ p2 = gbranch(optoas(OGT, types[tt]), T);
+ patch(p1, pc);
+ gins(AFMOVVP, &f0, t); // don't care about t, but will pop the stack
+ split64(t, &tlo, &thi);
+ gins(AMOVL, ncon(0), &tlo);
+ gins(AMOVL, ncon(0), &thi);
+ splitclean();
+ p1 = gbranch(AJMP, T);
+ patch(p2, pc);
+
+ // in range; algorithm is:
// if small enough, use native float64 -> int64 conversion.
// otherwise, subtract 2^63, convert, and add it back.
- bignodes();
- regalloc(&r1, types[ft], N);
- regalloc(&r2, types[ft], N);
- gins(optoas(OCMP, f->type), &bigf, &r1);
- p1 = gbranch(optoas(OLE, f->type), T);
- gins(a, &r1, &r2);
- p2 = gbranch(AJMP, T);
- patch(p1, pc);
- gins(optoas(OAS, f->type), &bigf, &r3);
- gins(optoas(OSUB, f->type), &r3, &r1);
- gins(a, &r1, &r2);
- gins(AMOVQ, &bigi, &r4);
- gins(AXORQ, &r4, &r2);
+
+ // set round to zero mode during conversion
+ tempalloc(&t1, types[TUINT16]);
+ tempalloc(&t2, types[TUINT16]);
+ gins(AFSTCW, N, &t1);
+ gins(AMOVW, ncon(0xf7f), &t2);
+ gins(AFLDCW, &t2, N);
+ tempfree(&t2);
+
+ // actual work
+ gmove(&two63f, &f0);
+ gins(AFUCOMP, &f0, &f1);
+ gins(AFSTSW, N, &ax);
+ gins(ASAHF, N, N);
+ p2 = gbranch(optoas(OLE, types[tt]), T);
+ gins(AFMOVVP, &f0, t);
+ p3 = gbranch(AJMP, T);
patch(p2, pc);
- gmove(&r2, t);
- regfree(&r4);
- regfree(&r3);
- regfree(&r2);
- regfree(&r1);
- fatal("lazy");
+ gmove(&two63f, &f0);
+ gins(AFSUBDP, &f0, &f1);
+ gins(AFMOVVP, &f0, t);
+ split64(t, &tlo, &thi);
+ gins(AXORL, ncon(0x80000000), &thi); // + 2^63
+ patch(p3, pc);
+ patch(p1, pc);
+ splitclean();
+
+ // restore rounding mode
+ gins(AFLDCW, &t1, N);
+ tempfree(&t1);
return;
- */
+
/*
* integer to float
- *
+ */
+ case CASE(TINT16, TFLOAT32):
+ case CASE(TINT16, TFLOAT64):
case CASE(TINT32, TFLOAT32):
- a = ACVTSL2SS;
- goto rdst;
-
-
case CASE(TINT32, TFLOAT64):
- a = ACVTSL2SD;
- goto rdst;
-
case CASE(TINT64, TFLOAT32):
- a = ACVTSQ2SS;
- goto rdst;
-
case CASE(TINT64, TFLOAT64):
- a = ACVTSQ2SD;
- goto rdst;
+ if(t->op != OREGISTER)
+ goto hard;
+ if(f->op == OREGISTER) {
+ cvt = f->type;
+ goto hardmem;
+ }
+ switch(ft) {
+ case TINT16:
+ a = AFMOVW;
+ break;
+ case TINT32:
+ a = AFMOVL;
+ break;
+ default:
+ a = AFMOVV;
+ break;
+ }
+ break;
- case CASE(TINT16, TFLOAT32):
- case CASE(TINT16, TFLOAT64):
case CASE(TINT8, TFLOAT32):
case CASE(TINT8, TFLOAT64):
case CASE(TUINT16, TFLOAT32):
case CASE(TUINT16, TFLOAT64):
case CASE(TUINT8, TFLOAT32):
case CASE(TUINT8, TFLOAT64):
- // convert via int32
+ // convert via int32 memory
cvt = types[TINT32];
- goto hard;
+ goto hardmem;
case CASE(TUINT32, TFLOAT32):
case CASE(TUINT32, TFLOAT64):
- // convert via int64.
+ // convert via int64 memory
cvt = types[TINT64];
- goto hard;
+ goto hardmem;
case CASE(TUINT64, TFLOAT32):
case CASE(TUINT64, TFLOAT64):
// algorithm is:
// if small enough, use native int64 -> uint64 conversion.
// otherwise, halve (rounding to odd?), convert, and double.
- a = ACVTSQ2SS;
- if(tt == TFLOAT64)
- a = ACVTSQ2SD;
- nodconst(&zero, types[TUINT64], 0);
- nodconst(&one, types[TUINT64], 1);
- regalloc(&r1, f->type, f);
- regalloc(&r2, t->type, t);
- regalloc(&r3, f->type, N);
- regalloc(&r4, f->type, N);
- gmove(f, &r1);
- gins(ACMPQ, &r1, &zero);
+ nodreg(&ax, types[TUINT32], D_AX);
+ nodreg(&dx, types[TUINT32], D_DX);
+ nodreg(&cx, types[TUINT32], D_CX);
+ tempalloc(&t1, f->type);
+ split64(&t1, &tlo, &thi);
+ gmove(f, &t1);
+ gins(ACMPL, &thi, ncon(0));
p1 = gbranch(AJLT, T);
- gins(a, &r1, &r2);
+ // native
+ t1.type = types[TINT64];
+ gmove(&t1, t);
p2 = gbranch(AJMP, T);
+ // simulated
patch(p1, pc);
- gmove(&r1, &r3);
- gins(ASHRQ, &one, &r3);
- gmove(&r1, &r4);
- gins(AANDL, &one, &r4);
- gins(AORQ, &r4, &r3);
- gins(a, &r3, &r2);
- gins(optoas(OADD, t->type), &r2, &r2);
+ gmove(&tlo, &ax);
+ gmove(&thi, &dx);
+ p1 = gins(ASHRL, ncon(1), &ax);
+ p1->from.index = D_DX; // double-width shift DX -> AX
+ p1->from.scale = 0;
+ gins(ASETCC, N, &cx);
+ gins(AORB, &cx, &ax);
+ gins(ASHRL, ncon(1), &dx);
+ gmove(&dx, &thi);
+ gmove(&ax, &tlo);
+ nodreg(&r1, types[tt], D_F0);
+ nodreg(&r2, types[tt], D_F0 + 1);
+ gmove(&t1, &r1); // t1.type is TINT64 now, set above
+ gins(AFMOVD, &r1, &r1);
+ gins(AFADDDP, &r1, &r2);
+ gmove(&r1, t);
patch(p2, pc);
- gmove(&r2, t);
- regfree(&r4);
- regfree(&r3);
- regfree(&r2);
- regfree(&r1);
+ splitclean();
+ tempfree(&t1);
return;
- */
+
/*
* float to float
*/
case CASE(TFLOAT32, TFLOAT32):
- a = AFMOVF;
- break;
-
case CASE(TFLOAT64, TFLOAT64):
- a = AFMOVD;
+ // The way the code generator uses floating-point
+ // registers, a move from F0 to F0 is intended as a no-op.
+ // On the x86, it's not: it pushes a second copy of F0
+ // on the floating point stack. So toss it away here.
+ // Also, F0 is the *only* register we ever evaluate
+ // into, so we should only see register/register as F0/F0.
+ if(f->op == OREGISTER && t->op == OREGISTER) {
+ if(f->val.u.reg != D_F0 || t->val.u.reg != D_F0)
+ goto fatal;
+ return;
+ }
+ if(ismem(f) && ismem(t))
+ goto hard;
+ a = AFMOVF;
+ if(ft == TFLOAT64)
+ a = AFMOVD;
+ if(ismem(t)) {
+ a = AFMOVFP;
+ if(ft == TFLOAT64)
+ a = AFMOVDP;
+ }
break;
- /*
case CASE(TFLOAT32, TFLOAT64):
- a = ACVTSS2SD;
- goto rdst;
+ if(f->op == OREGISTER)
+ gins(AFMOVD, f, t);
+ else
+ gins(AFMOVF, f, t);
+ return;
case CASE(TFLOAT64, TFLOAT32):
- a = ACVTSD2SS;
- goto rdst;
- */
+ if(f->op == OREGISTER)
+ gins(AFMOVF, f, t);
+ else
+ gins(AFMOVD, f, t);
+ return;
}
gins(a, f, t);
gmove(&r1, t);
regfree(&r1);
return;
+
+hardmem:
+ // requires memory intermediate
+ tempalloc(&r1, cvt);
+ gmove(f, &r1);
+ gmove(&r1, t);
+ tempfree(&r1);
+ return;
+
+fatal:
+ // should not happen
+ fatal("gmove %N -> %N", f, t);
}
int