#define NREGVAR 16 /* 8 integer + 8 floating */
#define REGBITS ((uint32)0xffff)
+static Reg* firstr;
static int first = 1;
-static void fixtemp(Prog*);
-
int
rcmp(const void *a1, const void *a2)
{
first = 0;
}
- fixtemp(firstp);
fixjmp(firstp);
+ mergetemp(firstp);
/*
* control flow is more complicated in generated go code
* pass 2
* find looping structure
*/
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->f.active = 0;
- change = 0;
flowrpo(g);
if(debug['R'] && debug['v'])
// }
}
}
-
-static uint32
-fnv1(Sym *sym)
-{
- uint32 h;
- char *s;
-
- h = 2166136261U;
- for(s=sym->name;*s;s++) {
- h = (16777619 * h) ^ (uint32)(uint8)(*s);
- }
- return h;
-}
-
-static uint16
-hash32to16(uint32 h)
-{
- return (h & 0xffff) ^ (h >> 16);
-}
-
-/*
- * fixtemp eliminates sequences like:
- * MOV reg1, mem
- * OP mem, reg2
- * when mem is a stack variable which is not mentioned
- * anywhere else. The instructions are replaced by
- * OP reg1, reg2
- * this reduces the number of variables that the register optimizer
- * sees, which lets it do a better job and makes it less likely to turn
- * itself off.
- */
-static void
-fixtemp(Prog *firstp)
-{
- static uint8 counts[1<<16]; // A hash table to count variable occurrences.
- int i;
- Prog *p, *p2;
- uint32 h;
-
- if(debug['R'] && debug['v'])
- print("\nfixtemp\n");
-
- // Count variable references. We actually use a hashtable so this
- // is only approximate.
- for(i=0; i<nelem(counts); i++)
- counts[i] = 0;
- for(p=firstp; p!=P; p=p->link) {
- if(p->from.type == D_AUTO) {
- h = hash32to16(fnv1(p->from.sym));
- //print("seen %S hash %d\n", p->from.sym, hash32to16(h));
- if(counts[h] < 10)
- counts[h]++;
- }
- if(p->to.type == D_AUTO) {
- h = hash32to16(fnv1(p->to.sym));
- //print("seen %S hash %d\n", p->to.sym, hash32to16(h));
- if(counts[h] < 10)
- counts[h]++;
- }
- }
-
- // Eliminate single-write, single-read stack variables.
- for(p=firstp; p!=P; p=p->link) {
- if(debug['R'] && debug['v'])
- print("%P\n", p);
- if(p->link == P || p->to.type != D_AUTO)
- continue;
- if(isfloat[p->to.etype] && FtoB(p->from.type)) {
- switch(p->as) {
- case AMOVSS:
- case AMOVSD:
- break;
- default:
- continue;
- }
- } else if(!isfloat[p->to.etype] && RtoB(p->from.type)) {
- switch(p->as) {
- case AMOVB:
- if(p->to.width == 1)
- break;
- case AMOVW:
- if(p->to.width == 2)
- break;
- case AMOVL:
- if(p->to.width == 4)
- break;
- default:
- continue;
- }
- } else
- continue;
- // p is a MOV reg, mem.
- p2 = p->link;
- h = hash32to16(fnv1(p->to.sym));
- if(counts[h] != 2) {
- continue;
- }
- switch(p2->as) {
- case ALEAL:
- case AFMOVD:
- case AFMOVF:
- case AFMOVL:
- case AFMOVW:
- case AFMOVV:
- // funny
- continue;
- }
- // p2 is OP mem, reg2
- // and OP is not a funny instruction.
- if(p2->from.sym == p->to.sym
- && p2->from.offset == p->to.offset
- && p2->from.type == p->to.type) {
- if(debug['R'] && debug['v']) {
- print(" ===elide== %D\n", &p->to);
- print("%P", p2);
- }
- // p2 is OP mem, reg2.
- // change to OP reg, reg2 and
- // eliminate the mov.
- p2->from = p->from;
- *p = *p2;
- p->link = p2->link;
- if(debug['R'] && debug['v']) {
- print(" ===change== %P\n", p);
- }
- }
- }
-}
}
}
+#undef alive
+#undef dead
+
// Control flow analysis. The Flow structures hold predecessor and successor
// information as well as basic loop analysis.
//
if(g->rpo == nil || idom == nil)
fatal("out of memory");
+ for(r1 = g->start; r1 != nil; r1 = r1->link)
+ r1->active = 0;
+
rpo2r = g->rpo;
d = postorder(g->start, rpo2r, 0);
nr = g->num;
loopmark(rpo2r, i, r1);
}
free(idom);
+
+ for(r1 = g->start; r1 != nil; r1 = r1->link)
+ r1->active = 0;
}
Flow*
return r1;
}
+// The compilers assume they can generate temporary variables
+// as needed to preserve the right semantics or simplify code
+// generation and the back end will still generate good code.
+// This results in a large number of ephemeral temporary variables.
+// Merge temps with non-overlapping lifetimes and equal types using the
+// greedy algorithm in Poletto and Sarkar, "Linear Scan Register Allocation",
+// ACM TOPLAS 1999.
+
+typedef struct TempVar TempVar;
+typedef struct TempFlow TempFlow;
+
+struct TempVar
+{
+ Node *node;
+ TempFlow *def; // definition of temp var
+ TempFlow *use; // use list, chained through TempFlow.uselink
+ TempVar *freelink; // next free temp in Type.opt list
+ TempVar *merge; // merge var with this one
+ uint32 start; // smallest Prog.loc in live range
+ uint32 end; // largest Prog.loc in live range
+ uchar addr; // address taken - no accurate end
+ uchar removed; // removed from program
+};
+
+struct TempFlow
+{
+ Flow f;
+ TempFlow *uselink;
+};
+
+static int
+startcmp(const void *va, const void *vb)
+{
+ TempVar *a, *b;
+
+ a = *(TempVar**)va;
+ b = *(TempVar**)vb;
+
+ if(a->start < b->start)
+ return -1;
+ if(a->start > b->start)
+ return +1;
+ return 0;
+}
+
+// Is n available for merging?
+static int
+canmerge(Node *n)
+{
+ return n->class == PAUTO && !n->addrtaken && strncmp(n->sym->name, "autotmp", 7) == 0;
+}
+
+static void mergewalk(TempVar*, TempFlow*, uint32);
+
+void
+mergetemp(Prog *firstp)
+{
+ int i, j, nvar, ninuse, nfree, nkill;
+ TempVar *var, *v, *v1, **bystart, **inuse;
+ TempFlow *r;
+ NodeList *l, **lp;
+ Node *n;
+ Prog *p, *p1;
+ Type *t;
+ ProgInfo info, info1;
+ int32 gen;
+ Graph *g;
+
+ enum { Debug = 0 };
+
+ g = flowstart(firstp, sizeof(TempFlow));
+ if(g == nil)
+ return;
+
+ // Build list of all mergeable variables.
+ nvar = 0;
+ for(l = curfn->dcl; l != nil; l = l->next)
+ if(canmerge(l->n))
+ nvar++;
+
+ var = calloc(nvar*sizeof var[0], 1);
+ nvar = 0;
+ for(l = curfn->dcl; l != nil; l = l->next) {
+ n = l->n;
+ if(canmerge(n)) {
+ v = &var[nvar++];
+ n->opt = v;
+ v->node = n;
+ }
+ }
+
+ // Build list of uses.
+ // We assume that the earliest reference to a temporary is its definition.
+ // This is not true of variables in general but our temporaries are all
+ // single-use (that's why we have so many!).
+ for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) {
+ p = r->f.prog;
+ proginfo(&info, p);
+
+ if(p->from.node != N && p->from.node->opt && p->to.node != N && p->to.node->opt)
+ fatal("double node %P", p);
+ if((n = p->from.node) != N && (v = n->opt) != nil ||
+ (n = p->to.node) != N && (v = n->opt) != nil) {
+ if(v->def == nil)
+ v->def = r;
+ r->uselink = v->use;
+ v->use = r;
+ if(n == p->from.node && (info.flags & LeftAddr))
+ v->addr = 1;
+ }
+ }
+
+ if(Debug > 1)
+ dumpit("before", g->start, 0);
+
+ nkill = 0;
+
+ // Special case.
+ for(v = var; v < var+nvar; v++) {
+ if(v->addr)
+ continue;
+ // Used in only one instruction, which had better be a write.
+ if((r = v->use) != nil && r->uselink == nil) {
+ p = r->f.prog;
+ proginfo(&info, p);
+ if(p->to.node == v->node && (info.flags & RightWrite) && !(info.flags & RightRead)) {
+ p->as = ANOP;
+ p->to = zprog.to;
+ v->removed = 1;
+ if(Debug)
+ print("drop write-only %S\n", v->node->sym);
+ } else
+ fatal("temp used and not set: %P", p);
+ nkill++;
+ continue;
+ }
+
+ // Written in one instruction, read in the next, otherwise unused,
+ // no jumps to the next instruction. Happens mainly in 386 compiler.
+ if((r = v->use) != nil && r->f.link == &r->uselink->f && r->uselink->uselink == nil && uniqp(r->f.link) == &r->f) {
+ p = r->f.prog;
+ proginfo(&info, p);
+ p1 = r->f.link->prog;
+ proginfo(&info1, p1);
+ enum {
+ SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD,
+ };
+ if(p->from.node == v->node && p1->to.node == v->node && (info.flags & Move) &&
+ !((info.flags|info1.flags) & (LeftAddr|RightAddr)) &&
+ (info.flags & SizeAny) == (info1.flags & SizeAny)) {
+ p1->from = p->from;
+ excise(&r->f);
+ v->removed = 1;
+ if(Debug)
+ print("drop immediate-use %S\n", v->node->sym);
+ }
+ nkill++;
+ continue;
+ }
+ }
+
+ // Traverse live range of each variable to set start, end.
+ // Each flood uses a new value of gen so that we don't have
+ // to clear all the r->f.active words after each variable.
+ gen = 0;
+ for(v = var; v < var+nvar; v++) {
+ gen++;
+ for(r = v->use; r != nil; r = r->uselink)
+ mergewalk(v, r, gen);
+ }
+
+ // Sort variables by start.
+ bystart = malloc(nvar*sizeof bystart[0]);
+ for(i=0; i<nvar; i++)
+ bystart[i] = &var[i];
+ qsort(bystart, nvar, sizeof bystart[0], startcmp);
+
+ // List of in-use variables, sorted by end, so that the ones that
+ // will last the longest are the earliest ones in the array.
+ // The tail inuse[nfree:] holds no-longer-used variables.
+ // In theory we should use a sorted tree so that insertions are
+ // guaranteed O(log n) and then the loop is guaranteed O(n log n).
+ // In practice, it doesn't really matter.
+ inuse = malloc(nvar*sizeof inuse[0]);
+ ninuse = 0;
+ nfree = nvar;
+ for(i=0; i<nvar; i++) {
+ v = bystart[i];
+ if(v->addr || v->removed)
+ continue;
+
+ // Expire no longer in use.
+ while(ninuse > 0 && inuse[ninuse-1]->end < v->start) {
+ v1 = inuse[--ninuse];
+ inuse[--nfree] = v1;
+ }
+
+ // Find old temp to reuse if possible.
+ t = v->node->type;
+ for(j=nfree; j<nvar; j++) {
+ v1 = inuse[j];
+ if(eqtype(t, v1->node->type)) {
+ inuse[j] = inuse[nfree++];
+ if(v1->merge)
+ v->merge = v1->merge;
+ else
+ v->merge = v1;
+ nkill++;
+ break;
+ }
+ }
+
+ // Sort v into inuse.
+ j = ninuse++;
+ while(j > 0 && inuse[j-1]->end < v->end) {
+ inuse[j] = inuse[j-1];
+ j--;
+ }
+ inuse[j] = v;
+ }
+
+ if(Debug) {
+ print("%S [%d - %d]\n", curfn->nname->sym, nvar, nkill);
+ for(v=var; v<var+nvar; v++) {
+ print("var %#N %T %d-%d", v->node, v->node->type, v->start, v->end);
+ if(v->addr)
+ print(" addr=1");
+ if(v->removed)
+ print(" dead=1");
+ if(v->merge)
+ print(" merge %#N", v->merge->node);
+ if(v->start == v->end)
+ print(" %P", v->def->f.prog);
+ print("\n");
+ }
+
+ if(Debug > 1)
+ dumpit("after", g->start, 0);
+ }
+
+ // Update node references to use merged temporaries.
+ for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) {
+ p = r->f.prog;
+ if((n = p->from.node) != N && (v = n->opt) != nil && v->merge != nil)
+ p->from.node = v->merge->node;
+ if((n = p->to.node) != N && (v = n->opt) != nil && v->merge != nil)
+ p->to.node = v->merge->node;
+ }
+
+ // Delete merged nodes from declaration list.
+ for(lp = &curfn->dcl; (l = *lp); ) {
+ curfn->dcl->end = l;
+ n = l->n;
+ v = n->opt;
+ if(v && (v->merge || v->removed)) {
+ *lp = l->next;
+ continue;
+ }
+ lp = &l->next;
+ }
+
+ // Clear aux structures.
+ for(v=var; v<var+nvar; v++)
+ v->node->opt = nil;
+ free(var);
+ free(bystart);
+ free(inuse);
+ flowend(g);
+}
+
+static void
+mergewalk(TempVar *v, TempFlow *r0, uint32 gen)
+{
+ Prog *p;
+ TempFlow *r1, *r, *r2;
+
+ for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.p1) {
+ if(r1->f.active == gen)
+ break;
+ r1->f.active = gen;
+ p = r1->f.prog;
+ if(v->end < p->loc)
+ v->end = p->loc;
+ if(r1 == v->def) {
+ v->start = p->loc;
+ break;
+ }
+ }
+
+ for(r = r0; r != r1; r = (TempFlow*)r->f.p1)
+ for(r2 = (TempFlow*)r->f.p2; r2 != nil; r2 = (TempFlow*)r2->f.p2link)
+ mergewalk(v, r2, gen);
+}