void
sgen(Node *n, Node *res, int64 w)
{
- Node dst, src, tmp, nend;
+ Node dst, src, tmp, nend, r0, r1, r2, *f;
int32 c, odst, osrc;
int dir, align, op;
Prog *p, *ploop;
if(osrc < odst && odst < osrc+w)
dir = -dir;
+ if(op == AMOVW && dir > 0 && c >= 4 && c <= 128) {
+ r0.op = OREGISTER;
+ r0.val.u.reg = REGALLOC_R0;
+ r1.op = OREGISTER;
+ r1.val.u.reg = REGALLOC_R0 + 1;
+ r2.op = OREGISTER;
+ r2.val.u.reg = REGALLOC_R0 + 2;
+
+ regalloc(&src, types[tptr], &r1);
+ regalloc(&dst, types[tptr], &r2);
+ if(n->ullman >= res->ullman) {
+ // eval n first
+ agen(n, &src);
+ if(res->op == ONAME)
+ gvardef(res);
+ agen(res, &dst);
+ } else {
+ // eval res first
+ if(res->op == ONAME)
+ gvardef(res);
+ agen(res, &dst);
+ agen(n, &src);
+ }
+ regalloc(&tmp, types[tptr], &r0);
+ f = sysfunc("duffcopy");
+ p = gins(ADUFFCOPY, N, f);
+ afunclit(&p->to, f);
+ // 8 and 128 = magic constants: see ../../pkg/runtime/asm_arm.s
+ p->to.offset = 8*(128-c);
+
+ regfree(&tmp);
+ regfree(&src);
+ regfree(&dst);
+ return;
+ }
+
if(n->ullman >= res->ullman) {
agenr(n, &dst, res); // temporarily use dst
regalloc(&src, types[tptr], N);
#include "opt.h"
static Prog* appendpp(Prog*, int, int, int, int32, int, int, int32);
+static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *r0);
void
defframe(Prog *ptxt)
{
- uint32 frame;
- Prog *p, *p1;
+ uint32 frame, r0;
+ Prog *p;
+ vlong hi, lo;
NodeList *l;
Node *n;
- vlong i;
// fill in argument size
ptxt->to.type = D_CONST2;
// insert code to contain ambiguously live variables
// so that garbage collector only sees initialized values
// when it looks for pointers.
- //
- // TODO: determine best way to zero the given values.
- // among other problems, R0 is initialized to 0 multiple times,
- // but that's really the tip of the iceberg.
p = ptxt;
+ lo = hi = 0;
+ r0 = 0;
for(l=curfn->dcl; l != nil; l = l->next) {
n = l->n;
if(!n->needzero)
fatal("needzero class %d", n->class);
if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
- if(n->type->width <= 8*widthptr) {
- p = appendpp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0);
- for(i = 0; i < n->type->width; i += widthptr)
- p = appendpp(p, AMOVW, D_REG, 0, 0, D_OREG, REGSP, 4+frame+n->xoffset+i);
- } else {
- p = appendpp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0);
- p = appendpp(p, AADD, D_CONST, NREG, 4+frame+n->xoffset, D_REG, 1, 0);
- p->reg = REGSP;
- p = appendpp(p, AADD, D_CONST, NREG, n->type->width, D_REG, 2, 0);
- p->reg = 1;
- p1 = p = appendpp(p, AMOVW, D_REG, 0, 0, D_OREG, 1, 4);
- p->scond |= C_PBIT;
- p = appendpp(p, ACMP, D_REG, 1, 0, D_NONE, 0, 0);
- p->reg = 2;
- p = appendpp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0);
- patch(p, p1);
+ if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthptr) {
+ // merge with range we already have
+ lo = rnd(n->xoffset, widthptr);
+ continue;
}
- }
+ // zero old range
+ p = zerorange(p, frame, lo, hi, &r0);
+
+ // set new range
+ hi = n->xoffset + n->type->width;
+ lo = n->xoffset;
+ }
+ // zero final range
+ zerorange(p, frame, lo, hi, &r0);
+}
+
+static Prog*
+zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *r0)
+{
+ vlong cnt, i;
+ Prog *p1;
+ Node *f;
+
+ cnt = hi - lo;
+ if(cnt == 0)
+ return p;
+ if(*r0 == 0) {
+ p = appendpp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0);
+ *r0 = 1;
+ }
+ if(cnt < 4*widthptr) {
+ for(i = 0; i < cnt; i += widthptr)
+ p = appendpp(p, AMOVW, D_REG, 0, 0, D_OREG, REGSP, 4+frame+lo+i);
+ } else if(cnt <= 128*widthptr) {
+ p = appendpp(p, AADD, D_CONST, NREG, 4+frame+lo, D_REG, 1, 0);
+ p->reg = REGSP;
+ p = appendpp(p, ADUFFZERO, D_NONE, NREG, 0, D_OREG, NREG, 0);
+ f = sysfunc("duffzero");
+ naddr(f, &p->to, 1);
+ afunclit(&p->to, f);
+ p->to.offset = 4*(128-cnt/widthptr);
+ } else {
+ p = appendpp(p, AADD, D_CONST, NREG, 4+frame+lo, D_REG, 1, 0);
+ p->reg = REGSP;
+ p = appendpp(p, AADD, D_CONST, NREG, cnt, D_REG, 2, 0);
+ p->reg = 1;
+ p1 = p = appendpp(p, AMOVW, D_REG, 0, 0, D_OREG, 1, 4);
+ p->scond |= C_PBIT;
+ p = appendpp(p, ACMP, D_REG, 1, 0, D_NONE, 0, 0);
+ p->reg = 2;
+ p = appendpp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0);
+ patch(p, p1);
+ }
+ return p;
}
static Prog*
clearfat(Node *nl)
{
uint32 w, c, q;
- Node dst, nc, nz, end;
+ Node dst, nc, nz, end, r0, r1, *f;
Prog *p, *pl;
/* clear a fat object */
c = w % 4; // bytes
q = w / 4; // quads
- regalloc(&dst, types[tptr], N);
+ r0.op = OREGISTER;
+ r0.val.u.reg = REGALLOC_R0;
+ r1.op = OREGISTER;
+ r1.val.u.reg = REGALLOC_R0 + 1;
+ regalloc(&dst, types[tptr], &r1);
agen(nl, &dst);
nodconst(&nc, types[TUINT32], 0);
- regalloc(&nz, types[TUINT32], 0);
+ regalloc(&nz, types[TUINT32], &r0);
cgen(&nc, &nz);
- if(q >= 4) {
+ if(q > 128) {
regalloc(&end, types[tptr], N);
p = gins(AMOVW, &dst, &end);
p->from.type = D_CONST;
patch(gbranch(ABNE, T, 0), pl);
regfree(&end);
+ } else if(q >= 4) {
+ f = sysfunc("duffzero");
+ p = gins(ADUFFZERO, N, f);
+ afunclit(&p->to, f);
+ // 4 and 128 = magic constants: see ../../pkg/runtime/asm_arm.s
+ p->to.offset = 4*(128-q);
} else
while(q > 0) {
p = gins(AMOVW, &nz, &dst);