From: Russ Cox Date: Wed, 3 Jun 2009 06:25:17 +0000 (-0700) Subject: 8g: X-Git-Tag: weekly.2009-11-06~1481 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=a00bfb5b49bb73825d3993894d51d466e2776dc9;p=gostls13.git 8g: * floating point -> integer conversions. x86 defines that overflow/underflow results in 1<<15, 1<<31, 1<<63 for int16, int32, int64. when building the unsigned conversions out of the native signed ones, 8g turns overflow/underflow into zero. the spec does not say what should happen. * many tiny bug fixes. can run a large number of files from go/test now, and can fmt.Printf. * struggling with byte register allocation and float32 computation. R=ken OCL=29642 CL=29811 --- diff --git a/src/cmd/8g/cgen.c b/src/cmd/8g/cgen.c index 911e004dc2..75c15cd23a 100644 --- a/src/cmd/8g/cgen.c +++ b/src/cmd/8g/cgen.c @@ -7,58 +7,6 @@ #include "gg.h" -static int cancgen64(Node *n, Node *res); - -int -is64(Type *t) -{ - if(t == T) - return 0; - switch(simtype[t->etype]) { - case TINT64: - case TUINT64: - case TPTR64: - return 1; - } - return 0; -} - -int -noconv(Type *t1, Type *t2) -{ - int e1, e2; - - e1 = simtype[t1->etype]; - e2 = simtype[t2->etype]; - - switch(e1) { - case TINT8: - case TUINT8: - return e2 == TINT8 || e2 == TUINT8; - - case TINT16: - case TUINT16: - return e2 == TINT16 || e2 == TUINT16; - - case TINT32: - case TUINT32: - case TPTR32: - return e2 == TINT32 || e2 == TUINT32 || e2 == TPTR32; - - case TINT64: - case TUINT64: - case TPTR64: - return e2 == TINT64 || e2 == TUINT64 || e2 == TPTR64; - - case TFLOAT32: - return e2 == TFLOAT32; - - case TFLOAT64: - return e2 == TFLOAT64; - } - return 0; -} - /* * generate: * res = n; @@ -84,11 +32,16 @@ cgen(Node *n, Node *res) if(res == N || res->type == T) fatal("cgen: res nil"); + // static initializations + if(initflag && gen_as_init(n, res)) + return; + // function calls on both sides? introduce temporary if(n->ullman >= UINF && res->ullman >= UINF) { - tempname(&n1, n->type); + tempalloc(&n1, n->type); cgen(n, &n1); cgen(&n1, res); + tempfree(&n1); return; } @@ -125,10 +78,6 @@ cgen(Node *n, Node *res) // otherwise, the result is addressable but n is not. // let's do some computation. - // 64-bit ops are hard on 32-bit machine. - if(is64(n->type) && cancgen64(n, res)) - return; - // use ullman to pick operand to eval first. nl = n->left; nr = n->right; @@ -144,6 +93,25 @@ cgen(Node *n, Node *res) return; } + // 64-bit ops are hard on 32-bit machine. + if(is64(n->type) || is64(res->type) || n->left != N && is64(n->left->type)) { + switch(n->op) { + // math goes to cgen64. + case OMINUS: + case OCOM: + case OADD: + case OSUB: + case OMUL: + case OLSH: + case ORSH: + case OAND: + case OOR: + case OXOR: + cgen64(n, res); + return; + } + } + if(isfloat[n->type->etype] && isfloat[nl->type->etype]) goto flt; @@ -178,6 +146,7 @@ cgen(Node *n, Node *res) return; case OMINUS: + case OCOM: a = optoas(n->op, nl->type); goto uop; @@ -218,8 +187,8 @@ cgen(Node *n, Node *res) break; case OLEN: - if(istype(nl->type, TSTRING) || istype(nl->type, TMAP)) { - // both string and map have len in the first 32-bit word. + if(istype(nl->type, TMAP)) { + // map has len in the first 32-bit word. // a zero pointer means zero length tempalloc(&n1, types[tptr]); cgen(nl, &n1); @@ -243,7 +212,9 @@ cgen(Node *n, Node *res) regfree(&n1); break; } - if(isslice(nl->type)) { + if(istype(nl->type, TSTRING) || isslice(nl->type)) { + // both slice and string have len one pointer into the struct. + // a zero pointer means zero length igen(nl, &n1, res); n1.op = OINDREG; n1.type = types[TUINT32]; @@ -289,10 +260,6 @@ cgen(Node *n, Node *res) case OMOD: case ODIV: - if(isfloat[n->type->etype]) { - a = optoas(n->op, nl->type); - goto abop; - } cgen_div(n->op, nl, nr, res); break; @@ -340,8 +307,19 @@ uop: // unary return; flt: // floating-point. 387 (not SSE2) to interoperate with 6c - nodreg(&f0, n->type, D_F0); + nodreg(&f0, nl->type, D_F0); nodreg(&f1, n->type, D_F0+1); + if(nr != N) + goto flt2; + + // unary + cgen(nl, &f0); + if(n->op != OCONV) + gins(foptoas(n->op, n->type, 0), &f0, &f0); + gmove(&f0, res); + return; + +flt2: // binary if(nl->ullman >= nr->ullman) { cgen(nl, &f0); if(nr->addable) @@ -402,7 +380,7 @@ agen(Node *n, Node *res) fatal("agen %O", n->op); case OCONV: - if(!eqtype(n->type, nl->type)) + if(!cvttype(n->type, nl->type)) fatal("agen: non-trivial OCONV"); agen(nl, res); break; @@ -427,8 +405,11 @@ agen(Node *n, Node *res) if(nr->addable) { agenr(nl, &n3, res); if(!isconst(nr, CTINT)) { + tempalloc(&tmp, nr->type); + cgen(nr, &tmp); regalloc(&n1, nr->type, N); - cgen(nr, &n1); + gmove(&tmp, &n1); + tempfree(&tmp); } } else if(nl->addable) { if(!isconst(nr, CTINT)) { @@ -640,7 +621,7 @@ bgen(Node *n, int true, Prog *to) { int et, a; Node *nl, *nr, *r; - Node n1, n2, tmp; + Node n1, n2, tmp, t1, t2, ax; Prog *p1, *p2; if(debug['g']) { @@ -778,6 +759,37 @@ bgen(Node *n, int true, Prog *to) break; } + if(isfloat[nr->type->etype]) { + nodreg(&tmp, nr->type, D_F0); + nodreg(&n2, nr->type, D_F0 + 1); + nodreg(&ax, types[TUINT16], D_AX); + et = simsimtype(nr->type); + if(et == TFLOAT64) { + // easy - do in FPU + cgen(nr, &tmp); + cgen(nl, &tmp); + gins(AFUCOMPP, &tmp, &n2); + } else { + // NOTE(rsc): This is wrong. + // It's right for comparison but presumably all the + // other ops have the same problem. We need to + // figure out what the right solution is, besides + // tell people to use float64. + tempalloc(&t1, types[TFLOAT32]); + tempalloc(&t2, types[TFLOAT32]); + cgen(nr, &t1); + cgen(nl, &t2); + gmove(&t1, &tmp); + gins(AFCOMFP, &t1, &tmp); + tempfree(&t2); + tempfree(&t1); + } + gins(AFSTSW, N, &ax); + gins(ASAHF, N, N); + patch(gbranch(optoas(brrev(a), nr->type), T), to); + break; + } + if(is64(nr->type)) { if(!nl->addable) { tempalloc(&n1, nl->type); @@ -800,45 +812,43 @@ bgen(Node *n, int true, Prog *to) a = optoas(a, nr->type); if(nr->ullman >= UINF) { - regalloc(&n1, nr->type, N); - cgen(nr, &n1); - - tempname(&tmp, nr->type); - gmove(&n1, &tmp); - regfree(&n1); + tempalloc(&tmp, nr->type); + cgen(nr, &tmp); - regalloc(&n1, nl->type, N); + tempalloc(&n1, nl->type); cgen(nl, &n1); - regalloc(&n2, nr->type, &n2); + regalloc(&n2, nr->type, N); cgen(&tmp, &n2); gins(optoas(OCMP, nr->type), &n1, &n2); patch(gbranch(a, nr->type), to); - - regfree(&n1); + tempfree(&n1); + tempfree(&tmp); regfree(&n2); break; } - regalloc(&n1, nl->type, N); + tempalloc(&n1, nl->type); cgen(nl, &n1); if(smallintconst(nr)) { gins(optoas(OCMP, nr->type), &n1, nr); patch(gbranch(a, nr->type), to); - regfree(&n1); + tempfree(&n1); break; } + tempalloc(&tmp, nr->type); + cgen(nr, &tmp); regalloc(&n2, nr->type, N); - cgen(nr, &n2); + gmove(&tmp, &n2); + tempfree(&tmp); gins(optoas(OCMP, nr->type), &n1, &n2); patch(gbranch(a, nr->type), to); - - regfree(&n1); regfree(&n2); + tempfree(&n1); break; } } @@ -883,7 +893,7 @@ stkof(Node *n) void sgen(Node *n, Node *res, int w) { - Node nodl, nodr; + Node dst, src, tdst, tsrc; int32 c, q, odst, osrc; if(debug['g']) { @@ -904,22 +914,29 @@ sgen(Node *n, Node *res, int w) osrc = stkof(n); odst = stkof(res); - // TODO(rsc): Should these be tempalloc instead? - nodreg(&nodl, types[tptr], D_DI); - nodreg(&nodr, types[tptr], D_SI); - - if(n->ullman >= res->ullman) { - agen(n, &nodr); - agen(res, &nodl); - } else { - agen(res, &nodl); - agen(n, &nodr); - } + nodreg(&dst, types[tptr], D_DI); + nodreg(&src, types[tptr], D_SI); + + tempalloc(&tsrc, types[tptr]); + tempalloc(&tdst, types[tptr]); + if(!n->addable) + agen(n, &tsrc); + if(!res->addable) + agen(res, &tdst); + if(n->addable) + agen(n, &src); + else + gmove(&tsrc, &src); + if(res->addable) + agen(res, &dst); + else + gmove(&tdst, &dst); + tempfree(&tdst); + tempfree(&tsrc); c = w % 4; // bytes q = w / 4; // doublewords - gins(ACLD, N, N); // if we are copying forward on the stack and // the src and dst overlap, then reverse direction if(osrc < odst && odst < osrc+w) { @@ -949,6 +966,7 @@ sgen(Node *n, Node *res, int w) // we leave with the flag clear gins(ACLD, N, N); } else { + gins(ACLD, N, N); // paranoia. TODO(rsc): remove? // normal direction if(q >= 4) { gconreg(AMOVL, q, D_CX); @@ -966,34 +984,13 @@ sgen(Node *n, Node *res, int w) } } -void -nswap(Node *a, Node *b) -{ - Node t; - - t = *a; - *a = *b; - *b = t; -} - -Node* -ncon(uint32 i) -{ - static Node n; - - if(n.type == T) - nodconst(&n, types[TUINT32], 0); - mpmovecfix(n.val.u.xval, i); - return &n; -} - /* * attempt to generate 64-bit * res = n * return 1 on success, 0 if op not handled. */ -static int -cancgen64(Node *n, Node *res) +void +cgen64(Node *n, Node *res) { Node t1, t2, ax, dx, cx, ex, fx, *l, *r; Node lo1, lo2, lo3, hi1, hi2, hi3; @@ -1001,8 +998,6 @@ cancgen64(Node *n, Node *res) uint64 v; uint32 lv, hv; - if(n->op == OCALL) - return 0; if(res->op != OINDREG && res->op != ONAME) { dump("n", n); dump("res", res); @@ -1010,12 +1005,7 @@ cancgen64(Node *n, Node *res) } switch(n->op) { default: - return 0; - - case ONAME: - case ODOT: - gmove(n, res); - return 1; + fatal("cgen64 %O", n->op); case OMINUS: cgen(n->left, res); @@ -1024,7 +1014,7 @@ cancgen64(Node *n, Node *res) gins(AADCL, ncon(0), &hi1); gins(ANEGL, N, &hi1); splitclean(); - return 1; + return; case OCOM: cgen(n->left, res); @@ -1032,7 +1022,7 @@ cancgen64(Node *n, Node *res) gins(ANOTL, N, &lo1); gins(ANOTL, N, &hi1); splitclean(); - return 1; + return; case OADD: case OSUB: @@ -1408,7 +1398,6 @@ out: tempfree(&t2); if(l == &t1) tempfree(&t1); - return 1; } /* diff --git a/src/cmd/8g/gg.h b/src/cmd/8g/gg.h index ee9140b047..03f7aac6f6 100644 --- a/src/cmd/8g/gg.h +++ b/src/cmd/8g/gg.h @@ -2,7 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. - #include #include @@ -68,7 +67,7 @@ EXTERN Node* throwreturn; EXTERN int maxstksize; /* - * gen.c + * ggen.c */ void compile(Node*); void proglist(void); @@ -90,7 +89,7 @@ void checklabels(); void ginscall(Node*, int); /* - * cgen + * cgen.c */ void agen(Node*, Node*); void agenr(Node *n, Node *a, Node *res); @@ -103,10 +102,14 @@ Prog* gins(int, Node*, Node*); int samaddr(Node*, Node*); void naddr(Node*, Addr*); void cgen_aret(Node*, Node*); -int is64(Type*); -void cmp64(Node*, Node*, int, Prog*); Node* ncon(uint32); +/* + * cgen64.c + */ +void cmp64(Node*, Node*, int, Prog*); +void cgen64(Node*, Node*); + /* * gsubr.c */ @@ -133,9 +136,10 @@ void tempfree(Node*); Node* nodarg(Type*, int); void nodreg(Node*, Type*, int); void nodindreg(Node*, Type*, int); -void nodconst(Node*, Type*, vlong); +void nodconst(Node*, Type*, int64); void gconreg(int, vlong, int); void datagostring(Strlit*, Addr*); +void datastring(char*, int, Addr*); void buildtxt(void); Plist* newplist(void); int isfat(Type*); @@ -145,6 +149,7 @@ int dotaddable(Node*, Node*); void afunclit(Addr*); void split64(Node*, Node*, Node*); void splitclean(void); +void nswap(Node*, Node*); /* * list.c diff --git a/src/cmd/8g/gsubr.c b/src/cmd/8g/gsubr.c index 1d9e9967f6..4f30c606b9 100755 --- a/src/cmd/8g/gsubr.c +++ b/src/cmd/8g/gsubr.c @@ -407,6 +407,22 @@ optoas(int op, Type *t) a = ADECL; break; + case CASE(OCOM, TINT8): + case CASE(OCOM, TUINT8): + a = ANOTB; + break; + + case CASE(OCOM, TINT16): + case CASE(OCOM, TUINT16): + a = ANOTW; + break; + + case CASE(OCOM, TINT32): + case CASE(OCOM, TUINT32): + case CASE(OCOM, TPTR32): + a = ANOTL; + break; + case CASE(OMINUS, TINT8): case CASE(OMINUS, TUINT8): a = ANEGB; @@ -560,6 +576,10 @@ optoas(int op, Type *t) a = ADIVL; break; + case CASE(OEXTEND, TINT8): + a = ACBW; + break; + case CASE(OEXTEND, TINT16): a = ACWD; break; @@ -577,7 +597,13 @@ foptoas(int op, Type *t, int flg) { int et; - et = t->etype; + et = simtype[t->etype]; + + // If we need Fpop, it means we're working on + // two different floating-point registers, not memory. + // There the instruction only has a float64 form. + if(flg & Fpop) + et = TFLOAT64; // clear Frev if unneeded switch(op) { @@ -655,6 +681,9 @@ static int resvd[] = D_CX, // for shift D_DX, // for divide D_SP, // for stack + + D_BL, // because D_BX can be allocated + D_BH, }; void @@ -664,7 +693,7 @@ ginit(void) for(i=0; iop == OREGISTER) { i = o->val.u.reg; - if(i >= D_AX && i <= D_DI) + if(i >= D_AX && i <= max) goto out; } - for(i=D_AX; i<=D_DI; i++) + for(i=min; i<=max; i++) if(reg[i] == 0) goto out; fprint(2, "registers allocated at\n"); - for(i=D_AX; i<=D_DI; i++) + for(i=min; i<=max; i++) fprint(2, "\t%R\t%#lux\n", i, regpc[i]); yyerror("out of fixed registers"); goto err; @@ -805,6 +844,7 @@ tempalloc(Node *n, Type *t) stksize += w; stksize = rnd(stksize, w); n->xoffset = -stksize; +//print("tempalloc %d -> %d from %p\n", n->ostk, n->xoffset, __builtin_return_address(0)); if(stksize > maxstksize) maxstksize = stksize; } @@ -812,6 +852,7 @@ tempalloc(Node *n, Type *t) void tempfree(Node *n) { +//print("tempfree %d\n", n->xoffset); if(n->xoffset != -stksize) fatal("tempfree %lld %d", -n->xoffset, stksize); stksize = n->ostk; @@ -912,6 +953,33 @@ gconreg(int as, vlong c, int reg) gins(as, &n1, &n2); } +/* + * swap node contents + */ +void +nswap(Node *a, Node *b) +{ + Node t; + + t = *a; + *a = *b; + *b = t; +} + +/* + * return constant i node. + * overwritten by next call, but useful in calls to gins. + */ +Node* +ncon(uint32 i) +{ + static Node n; + + if(n.type == T) + nodconst(&n, types[TUINT32], 0); + mpmovecfix(n.val.u.xval, i); + return &n; +} /* * Is this node a memory operand? @@ -954,9 +1022,17 @@ split64(Node *n, Node *lo, Node *hi) sclean[nsclean-1] = n1; } n = &n1; - // fall through + goto common; case ONAME: + if(n->class == PPARAMREF) { + cgen(n->heapaddr, &n1); + sclean[nsclean-1] = n1; + // fall through. + n = &n1; + } + goto common; case OINDREG: + common: *lo = *n; *hi = *n; lo->type = types[TUINT32]; @@ -990,12 +1066,44 @@ splitclean(void) regfree(&sclean[nsclean]); } +/* + * set up nodes representing fp constants + */ +Node zerof; +Node two64f; +Node two63f; + +void +bignodes(void) +{ + static int did; + + if(did) + return; + did = 1; + + two64f = *ncon(0); + two64f.type = types[TFLOAT64]; + two64f.val.ctype = CTFLT; + two64f.val.u.fval = mal(sizeof *two64f.val.u.fval); + mpmovecflt(two64f.val.u.fval, 18446744073709551616.); + + two63f = two64f; + two63f.val.u.fval = mal(sizeof *two63f.val.u.fval); + mpmovecflt(two63f.val.u.fval, 9223372036854775808.); + + zerof = two64f; + zerof.val.u.fval = mal(sizeof *zerof.val.u.fval); + mpmovecflt(zerof.val.u.fval, 0); +} + void gmove(Node *f, Node *t) { int a, ft, tt; Type *cvt; - Node r1, r2, flo, fhi, tlo, thi, con; + Node r1, r2, t1, t2, flo, fhi, tlo, thi, con, f0, f1, ax, dx, cx; + Prog *p1, *p2, *p3; if(debug['M']) print("gmove %N -> %N\n", f, t); @@ -1004,16 +1112,19 @@ gmove(Node *f, Node *t) tt = simsimtype(t->type); cvt = t->type; - // cannot have two memory operands; + // cannot have two integer memory operands; // except 64-bit, which always copies via registers anyway. - if(ismem(f) && ismem(t) && !is64(f->type) && !is64(t->type)) + if(isint[ft] && isint[tt] && !is64(f->type) && !is64(t->type) && ismem(f) && ismem(t)) goto hard; // convert constant to desired type if(f->op == OLITERAL) { - convconst(&con, t->type, &f->val); + if(tt == TFLOAT32) + convconst(&con, types[TFLOAT64], &f->val); + else + convconst(&con, t->type, &f->val); f = &con; - ft = tt; // so big switch will choose a simple mov + ft = simsimtype(con.type); // some constants can't move directly to memory. if(ismem(t)) { @@ -1032,7 +1143,7 @@ gmove(Node *f, Node *t) switch(CASE(ft, tt)) { default: - fatal("gmove %N -> %N", f, t); + goto fatal; /* * integer copy and truncate @@ -1057,10 +1168,9 @@ gmove(Node *f, Node *t) case CASE(TINT64, TUINT8): case CASE(TUINT64, TUINT8): split64(f, &flo, &fhi); - regalloc(&r1, t->type, t); + nodreg(&r1, t->type, D_AX); gins(AMOVB, &flo, &r1); gins(AMOVB, &r1, t); - regfree(&r1); splitclean(); return; @@ -1080,10 +1190,9 @@ gmove(Node *f, Node *t) case CASE(TINT64, TUINT16): case CASE(TUINT64, TUINT16): split64(f, &flo, &fhi); - regalloc(&r1, t->type, t); + nodreg(&r1, t->type, D_AX); gins(AMOVW, &flo, &r1); gins(AMOVW, &r1, t); - regfree(&r1); splitclean(); return; @@ -1099,10 +1208,9 @@ gmove(Node *f, Node *t) case CASE(TINT64, TUINT32): case CASE(TUINT64, TUINT32): split64(f, &flo, &fhi); - regalloc(&r1, t->type, t); + nodreg(&r1, t->type, D_AX); gins(AMOVL, &flo, &r1); gins(AMOVL, &r1, t); - regfree(&r1); splitclean(); return; @@ -1116,14 +1224,12 @@ gmove(Node *f, Node *t) gins(AMOVL, &flo, &tlo); gins(AMOVL, &fhi, &thi); } else { - regalloc(&r1, types[TUINT32], N); - regalloc(&r2, types[TUINT32], N); + nodreg(&r1, t->type, D_AX); + nodreg(&r2, t->type, D_DX); gins(AMOVL, &flo, &r1); gins(AMOVL, &fhi, &r2); gins(AMOVL, &r1, &tlo); gins(AMOVL, &r2, &thi); - regfree(&r2); - regfree(&r1); } splitclean(); splitclean(); @@ -1198,23 +1304,36 @@ gmove(Node *f, Node *t) /* * float to integer - * + */ case CASE(TFLOAT32, TINT16): case CASE(TFLOAT32, TINT32): case CASE(TFLOAT32, TINT64): case CASE(TFLOAT64, TINT16): case CASE(TFLOAT64, TINT32): case CASE(TFLOAT64, TINT64): + if(t->op == OREGISTER) + goto hardmem; + nodreg(&r1, types[ft], D_F0); if(ft == TFLOAT32) - gins(AFMOVF, f, &f0); + gins(AFMOVF, f, &r1); else - gins(AFMOVD, f, &f0); + gins(AFMOVD, f, &r1); + + // set round to zero mode during conversion + tempalloc(&t1, types[TUINT16]); + tempalloc(&t2, types[TUINT16]); + gins(AFSTCW, N, &t1); + gins(AMOVW, ncon(0xf7f), &t2); + gins(AFLDCW, &t2, N); if(tt == TINT16) - gins(AFMOVWP, &f0, t); + gins(AFMOVWP, &r1, t); else if(tt == TINT32) - gins(AFMOVLP, &f0, t); + gins(AFMOVLP, &r1, t); else - gins(AFMOVVP, &f0, t); + gins(AFMOVVP, &r1, t); + gins(AFLDCW, &t1, N); + tempfree(&t2); + tempfree(&t1); return; case CASE(TFLOAT32, TINT8): @@ -1224,139 +1343,249 @@ gmove(Node *f, Node *t) case CASE(TFLOAT64, TUINT16): case CASE(TFLOAT64, TUINT8): // convert via int32. - cvt = types[TINT32]; - goto hard; + tempalloc(&t1, types[TINT32]); + gmove(f, &t1); + switch(tt) { + default: + fatal("gmove %T", t); + case TINT8: + gins(ACMPL, &t1, ncon(-0x80)); + p1 = gbranch(optoas(OLT, types[TINT32]), T); + gins(ACMPL, &t1, ncon(0x7f)); + p2 = gbranch(optoas(OGT, types[TINT32]), T); + p3 = gbranch(AJMP, T); + patch(p1, pc); + patch(p2, pc); + gmove(ncon(-0x80), &t1); + patch(p3, pc); + gmove(&t1, t); + break; + case TUINT8: + gins(ATESTL, ncon(0xffffff00), &t1); + p1 = gbranch(AJEQ, T); + gins(AMOVB, ncon(0), &t1); + patch(p1, pc); + gmove(&t1, t); + break; + case TUINT16: + gins(ATESTL, ncon(0xffff0000), &t1); + p1 = gbranch(AJEQ, T); + gins(AMOVW, ncon(0), &t1); + patch(p1, pc); + gmove(&t1, t); + break; + } + tempfree(&t1); + return; case CASE(TFLOAT32, TUINT32): case CASE(TFLOAT64, TUINT32): - // could potentially convert via int64. - cvt = types[TINT64]; - goto hard; + // convert via int64. + tempalloc(&t1, types[TINT64]); + gmove(f, &t1); + split64(&t1, &tlo, &thi); + gins(ACMPL, &thi, ncon(0)); + p1 = gbranch(AJEQ, T); + gins(AMOVL, ncon(0), &tlo); + patch(p1, pc); + gmove(&tlo, t); + splitclean(); + tempfree(&t1); + return; case CASE(TFLOAT32, TUINT64): case CASE(TFLOAT64, TUINT64): + bignodes(); + nodreg(&f0, types[ft], D_F0); + nodreg(&f1, types[ft], D_F0 + 1); + nodreg(&ax, types[TUINT16], D_AX); + if(ft == TFLOAT32) gins(AFMOVF, f, &f0); else gins(AFMOVD, f, &f0); - // algorithm is: + + // if 0 > v { answer = 0 } + gmove(&zerof, &f0); + gins(AFUCOMP, &f0, &f1); + gins(AFSTSW, N, &ax); + gins(ASAHF, N, N); + p1 = gbranch(optoas(OGT, types[tt]), T); + // if 1<<64 <= v { answer = 0 too } + gmove(&two64f, &f0); + gins(AFUCOMP, &f0, &f1); + gins(AFSTSW, N, &ax); + gins(ASAHF, N, N); + p2 = gbranch(optoas(OGT, types[tt]), T); + patch(p1, pc); + gins(AFMOVVP, &f0, t); // don't care about t, but will pop the stack + split64(t, &tlo, &thi); + gins(AMOVL, ncon(0), &tlo); + gins(AMOVL, ncon(0), &thi); + splitclean(); + p1 = gbranch(AJMP, T); + patch(p2, pc); + + // in range; algorithm is: // if small enough, use native float64 -> int64 conversion. // otherwise, subtract 2^63, convert, and add it back. - bignodes(); - regalloc(&r1, types[ft], N); - regalloc(&r2, types[ft], N); - gins(optoas(OCMP, f->type), &bigf, &r1); - p1 = gbranch(optoas(OLE, f->type), T); - gins(a, &r1, &r2); - p2 = gbranch(AJMP, T); - patch(p1, pc); - gins(optoas(OAS, f->type), &bigf, &r3); - gins(optoas(OSUB, f->type), &r3, &r1); - gins(a, &r1, &r2); - gins(AMOVQ, &bigi, &r4); - gins(AXORQ, &r4, &r2); + + // set round to zero mode during conversion + tempalloc(&t1, types[TUINT16]); + tempalloc(&t2, types[TUINT16]); + gins(AFSTCW, N, &t1); + gins(AMOVW, ncon(0xf7f), &t2); + gins(AFLDCW, &t2, N); + tempfree(&t2); + + // actual work + gmove(&two63f, &f0); + gins(AFUCOMP, &f0, &f1); + gins(AFSTSW, N, &ax); + gins(ASAHF, N, N); + p2 = gbranch(optoas(OLE, types[tt]), T); + gins(AFMOVVP, &f0, t); + p3 = gbranch(AJMP, T); patch(p2, pc); - gmove(&r2, t); - regfree(&r4); - regfree(&r3); - regfree(&r2); - regfree(&r1); - fatal("lazy"); + gmove(&two63f, &f0); + gins(AFSUBDP, &f0, &f1); + gins(AFMOVVP, &f0, t); + split64(t, &tlo, &thi); + gins(AXORL, ncon(0x80000000), &thi); // + 2^63 + patch(p3, pc); + patch(p1, pc); + splitclean(); + + // restore rounding mode + gins(AFLDCW, &t1, N); + tempfree(&t1); return; - */ + /* * integer to float - * + */ + case CASE(TINT16, TFLOAT32): + case CASE(TINT16, TFLOAT64): case CASE(TINT32, TFLOAT32): - a = ACVTSL2SS; - goto rdst; - - case CASE(TINT32, TFLOAT64): - a = ACVTSL2SD; - goto rdst; - case CASE(TINT64, TFLOAT32): - a = ACVTSQ2SS; - goto rdst; - case CASE(TINT64, TFLOAT64): - a = ACVTSQ2SD; - goto rdst; + if(t->op != OREGISTER) + goto hard; + if(f->op == OREGISTER) { + cvt = f->type; + goto hardmem; + } + switch(ft) { + case TINT16: + a = AFMOVW; + break; + case TINT32: + a = AFMOVL; + break; + default: + a = AFMOVV; + break; + } + break; - case CASE(TINT16, TFLOAT32): - case CASE(TINT16, TFLOAT64): case CASE(TINT8, TFLOAT32): case CASE(TINT8, TFLOAT64): case CASE(TUINT16, TFLOAT32): case CASE(TUINT16, TFLOAT64): case CASE(TUINT8, TFLOAT32): case CASE(TUINT8, TFLOAT64): - // convert via int32 + // convert via int32 memory cvt = types[TINT32]; - goto hard; + goto hardmem; case CASE(TUINT32, TFLOAT32): case CASE(TUINT32, TFLOAT64): - // convert via int64. + // convert via int64 memory cvt = types[TINT64]; - goto hard; + goto hardmem; case CASE(TUINT64, TFLOAT32): case CASE(TUINT64, TFLOAT64): // algorithm is: // if small enough, use native int64 -> uint64 conversion. // otherwise, halve (rounding to odd?), convert, and double. - a = ACVTSQ2SS; - if(tt == TFLOAT64) - a = ACVTSQ2SD; - nodconst(&zero, types[TUINT64], 0); - nodconst(&one, types[TUINT64], 1); - regalloc(&r1, f->type, f); - regalloc(&r2, t->type, t); - regalloc(&r3, f->type, N); - regalloc(&r4, f->type, N); - gmove(f, &r1); - gins(ACMPQ, &r1, &zero); + nodreg(&ax, types[TUINT32], D_AX); + nodreg(&dx, types[TUINT32], D_DX); + nodreg(&cx, types[TUINT32], D_CX); + tempalloc(&t1, f->type); + split64(&t1, &tlo, &thi); + gmove(f, &t1); + gins(ACMPL, &thi, ncon(0)); p1 = gbranch(AJLT, T); - gins(a, &r1, &r2); + // native + t1.type = types[TINT64]; + gmove(&t1, t); p2 = gbranch(AJMP, T); + // simulated patch(p1, pc); - gmove(&r1, &r3); - gins(ASHRQ, &one, &r3); - gmove(&r1, &r4); - gins(AANDL, &one, &r4); - gins(AORQ, &r4, &r3); - gins(a, &r3, &r2); - gins(optoas(OADD, t->type), &r2, &r2); + gmove(&tlo, &ax); + gmove(&thi, &dx); + p1 = gins(ASHRL, ncon(1), &ax); + p1->from.index = D_DX; // double-width shift DX -> AX + p1->from.scale = 0; + gins(ASETCC, N, &cx); + gins(AORB, &cx, &ax); + gins(ASHRL, ncon(1), &dx); + gmove(&dx, &thi); + gmove(&ax, &tlo); + nodreg(&r1, types[tt], D_F0); + nodreg(&r2, types[tt], D_F0 + 1); + gmove(&t1, &r1); // t1.type is TINT64 now, set above + gins(AFMOVD, &r1, &r1); + gins(AFADDDP, &r1, &r2); + gmove(&r1, t); patch(p2, pc); - gmove(&r2, t); - regfree(&r4); - regfree(&r3); - regfree(&r2); - regfree(&r1); + splitclean(); + tempfree(&t1); return; - */ + /* * float to float */ case CASE(TFLOAT32, TFLOAT32): - a = AFMOVF; - break; - case CASE(TFLOAT64, TFLOAT64): - a = AFMOVD; + // The way the code generator uses floating-point + // registers, a move from F0 to F0 is intended as a no-op. + // On the x86, it's not: it pushes a second copy of F0 + // on the floating point stack. So toss it away here. + // Also, F0 is the *only* register we ever evaluate + // into, so we should only see register/register as F0/F0. + if(f->op == OREGISTER && t->op == OREGISTER) { + if(f->val.u.reg != D_F0 || t->val.u.reg != D_F0) + goto fatal; + return; + } + if(ismem(f) && ismem(t)) + goto hard; + a = AFMOVF; + if(ft == TFLOAT64) + a = AFMOVD; + if(ismem(t)) { + a = AFMOVFP; + if(ft == TFLOAT64) + a = AFMOVDP; + } break; - /* case CASE(TFLOAT32, TFLOAT64): - a = ACVTSS2SD; - goto rdst; + if(f->op == OREGISTER) + gins(AFMOVD, f, t); + else + gins(AFMOVF, f, t); + return; case CASE(TFLOAT64, TFLOAT32): - a = ACVTSD2SS; - goto rdst; - */ + if(f->op == OREGISTER) + gins(AFMOVF, f, t); + else + gins(AFMOVD, f, t); + return; } gins(a, f, t); @@ -1377,6 +1606,18 @@ hard: gmove(&r1, t); regfree(&r1); return; + +hardmem: + // requires memory intermediate + tempalloc(&r1, cvt); + gmove(f, &r1); + gmove(&r1, t); + tempfree(&r1); + return; + +fatal: + // should not happen + fatal("gmove %N -> %N", f, t); } int