From: Russ Cox Date: Wed, 4 Feb 2015 00:23:18 +0000 (-0500) Subject: cmd/gc: move reg.c into portable code X-Git-Tag: go1.5beta1~2047 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=ad88fd1d4ac4b5d66c10456cdf37ee0f233d87ce;p=gostls13.git cmd/gc: move reg.c into portable code Now there is only one registerizer shared among all the systems. There are some unfortunate special cases based on arch.thechar in reg.c, to preserve bit-for-bit compatibility during the refactoring. Most are probably bugs one way or another and should be revisited. Change-Id: I153b435c0eaa05bbbeaf8876822eeb6dedaae3cf Reviewed-on: https://go-review.googlesource.com/3883 Reviewed-by: Austin Clements --- diff --git a/src/cmd/5g/galign.c b/src/cmd/5g/galign.c index c136406a79..394b36d84e 100644 --- a/src/cmd/5g/galign.c +++ b/src/cmd/5g/galign.c @@ -66,14 +66,22 @@ main(int argc, char **argv) arch.ginscall = ginscall; arch.igen = igen; arch.linkarchinit = linkarchinit; + arch.peep = peep; arch.proginfo = proginfo; arch.regalloc = regalloc; arch.regfree = regfree; - arch.regopt = regopt; arch.regtyp = regtyp; arch.sameaddr = sameaddr; arch.smallindir = smallindir; arch.stackaddr = stackaddr; + arch.excludedregs = excludedregs; + arch.RtoB = RtoB; + arch.FtoB = RtoB; + arch.BtoR = BtoR; + arch.BtoF = BtoF; + arch.optoas = optoas; + arch.doregbits = doregbits; + arch.regnames = regnames; gcmain(argc, argv); } diff --git a/src/cmd/5g/gg.h b/src/cmd/5g/gg.h index 210c9c2cc7..b12c7e2561 100644 --- a/src/cmd/5g/gg.h +++ b/src/cmd/5g/gg.h @@ -159,3 +159,19 @@ int sameaddr(Addr*, Addr*); int smallindir(Addr*, Addr*); int stackaddr(Addr*); Prog* unpatch(Prog*); + +/* + * reg.c + */ +uint64 excludedregs(void); +uint64 RtoB(int); +uint64 FtoB(int); +int BtoR(uint64); +int BtoF(uint64); +uint64 doregbits(int); +char** regnames(int*); + +/* + * peep.c + */ +void peep(Prog*); diff --git a/src/cmd/5g/ggen.c b/src/cmd/5g/ggen.c index f91cd71518..62b9beadb0 100644 --- a/src/cmd/5g/ggen.c +++ b/src/cmd/5g/ggen.c @@ -7,7 +7,7 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" static Prog* appendpp(Prog*, int, int, int, int32, int, int, int32); static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *r0); diff --git a/src/cmd/5g/opt.h b/src/cmd/5g/opt.h deleted file mode 100644 index 524607419d..0000000000 --- a/src/cmd/5g/opt.h +++ /dev/null @@ -1,179 +0,0 @@ -// Inferno utils/5c/gc.h -// http://code.google.com/p/inferno-os/source/browse/utils/5c/gc.h -// -// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. -// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) -// Portions Copyright © 1997-1999 Vita Nuova Limited -// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) -// Portions Copyright © 2004,2006 Bruce Ellis -// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) -// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others -// Portions Copyright © 2009 The Go Authors. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#define Z N -#define Adr Addr - -#define D_HI TYPE_NONE -#define D_LO TYPE_NONE - -#define BLOAD(r) band(bnot(r->refbehind), r->refahead) -#define BSTORE(r) band(bnot(r->calbehind), r->calahead) -#define LOAD(r) (~r->refbehind.b[z] & r->refahead.b[z]) -#define STORE(r) (~r->calbehind.b[z] & r->calahead.b[z]) - -#define CLOAD 5 -#define CREF 5 -#define CINF 1000 -#define LOOP 3 - -typedef struct Reg Reg; -typedef struct Rgn Rgn; - -/*c2go -extern Node *Z; -enum -{ - D_HI = TYPE_NONE, - D_LO = TYPE_NONE, - CLOAD = 5, - CREF = 5, - CINF = 1000, - LOOP = 3, -}; - -uint32 BLOAD(Reg*); -uint32 BSTORE(Reg*); -uint64 LOAD(Reg*); -uint64 STORE(Reg*); -*/ - -// A Reg is a wrapper around a single Prog (one instruction) that holds -// register optimization information while the optimizer runs. -// r->prog is the instruction. -// r->prog->opt points back to r. -struct Reg -{ - Flow f; - - Bits set; // regopt variables written by this instruction. - Bits use1; // regopt variables read by prog->from. - Bits use2; // regopt variables read by prog->to. - - // refahead/refbehind are the regopt variables whose current - // value may be used in the following/preceding instructions - // up to a CALL (or the value is clobbered). - Bits refbehind; - Bits refahead; - // calahead/calbehind are similar, but for variables in - // instructions that are reachable after hitting at least one - // CALL. - Bits calbehind; - Bits calahead; - Bits regdiff; - Bits act; - - int32 regu; // register used bitmap -}; -#define R ((Reg*)0) -/*c2go extern Reg *R; */ - -#define NRGN 600 -/*c2go enum { NRGN = 600 }; */ - -// A Rgn represents a single regopt variable over a region of code -// where a register could potentially be dedicated to that variable. -// The code encompassed by a Rgn is defined by the flow graph, -// starting at enter, flood-filling forward while varno is refahead -// and backward while varno is refbehind, and following branches. A -// single variable may be represented by multiple disjoint Rgns and -// each Rgn may choose a different register for that variable. -// Registers are allocated to regions greedily in order of descending -// cost. -struct Rgn -{ - Reg* enter; - short cost; - short varno; - short regno; -}; - -EXTERN Reg zreg; -EXTERN Reg* freer; -EXTERN Reg** rpo2r; -EXTERN Rgn region[NRGN]; -EXTERN Rgn* rgp; -EXTERN int nregion; -EXTERN int nvar; -EXTERN int32 regbits; -EXTERN Bits externs; -EXTERN Bits params; -EXTERN Bits consts; -EXTERN Bits addrs; -EXTERN Bits ivar; -EXTERN Bits ovar; -EXTERN int change; -EXTERN int32 maxnr; -EXTERN int32* idom; - -EXTERN struct -{ - int32 ncvtreg; - int32 nspill; - int32 nreload; - int32 ndelmov; - int32 nvar; - int32 naddr; -} ostats; - -/* - * reg.c - */ -Reg* rega(void); -int rcmp(const void*, const void*); -void regopt(Prog*); -void addmove(Reg*, int, int, int); -Bits mkvar(Reg *r, Adr *a); -void prop(Reg*, Bits, Bits); -void synch(Reg*, Bits); -uint32 allreg(uint32, Rgn*); -void paint1(Reg*, int); -uint32 paint2(Reg*, int, int); -void paint3(Reg*, int, uint32, int); -void addreg(Adr*, int); -void dumpit(char *str, Flow *r0, int); - -/* - * peep.c - */ -void peep(Prog*); -void excise(Flow*); -int copyu(Prog*, Adr*, Adr*); - -uint32 RtoB(int); -uint32 FtoB(int); -int BtoR(uint32); -int BtoF(uint32); - -/* - * prog.c - */ -void proginfo(ProgInfo*, Prog*); diff --git a/src/cmd/5g/peep.c b/src/cmd/5g/peep.c index 1a4df8d622..c9910d1134 100644 --- a/src/cmd/5g/peep.c +++ b/src/cmd/5g/peep.c @@ -32,7 +32,7 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" static int xtramodes(Graph*, Flow*, Adr*); static int shortprop(Flow *r); @@ -47,6 +47,7 @@ static Flow* findpre(Flow *r, Adr *v); static int copyau1(Prog *p, Adr *v); static int isdconst(Addr *a); static int isfloatreg(Addr*); +static int copyu(Prog *p, Adr *v, Adr *s); static uint32 gactive; @@ -941,7 +942,7 @@ xtramodes(Graph *g, Flow *r, Adr *a) * 4 if set and used * 0 otherwise (not touched) */ -int +static int copyu(Prog *p, Adr *v, Adr *s) { switch(p->as) { @@ -1572,3 +1573,12 @@ smallindir(Addr *a, Addr *reg) a->reg == reg->reg && 0 <= a->offset && a->offset < 4096; } + +void +excise(Flow *r) +{ + Prog *p; + + p = r->prog; + nopout(p); +} diff --git a/src/cmd/5g/prog.c b/src/cmd/5g/prog.c index a77f2336e9..9d5adefe69 100644 --- a/src/cmd/5g/prog.c +++ b/src/cmd/5g/prog.c @@ -5,7 +5,7 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" enum { @@ -148,4 +148,13 @@ proginfo(ProgInfo *info, Prog *p) if(((p->scond & C_SCOND) != C_SCOND_NONE) && (info->flags & RightWrite)) info->flags |= RightRead; + + switch(p->as) { + case ADIV: + case ADIVU: + case AMOD: + case AMODU: + info->regset |= RtoB(REG_R12); + break; + } } diff --git a/src/cmd/5g/reg.c b/src/cmd/5g/reg.c index ec21c6abf7..1216e01bd5 100644 --- a/src/cmd/5g/reg.c +++ b/src/cmd/5g/reg.c @@ -32,66 +32,11 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" -#define NREGVAR 32 -#define REGBITS ((uint64)0xffffffffull) -/*c2go enum { +enum { NREGVAR = 32, - REGBITS = 0xffffffff, }; -*/ - - void addsplits(void); -static Reg* firstr; -static int first = 1; - -int -rcmp(const void *a1, const void *a2) -{ - Rgn *p1, *p2; - int c1, c2; - - p1 = (Rgn*)a1; - p2 = (Rgn*)a2; - c1 = p2->cost; - c2 = p1->cost; - if(c1 -= c2) - return c1; - return p2->varno - p1->varno; -} - -void -excise(Flow *r) -{ - Prog *p; - - p = r->prog; - nopout(p); -} - -static void -setaddrs(Bits bit) -{ - int i, n; - Var *v; - Node *node; - - while(bany(&bit)) { - // convert each bit to a variable - i = bnum(bit); - node = var[i].node; - n = var[i].name; - biclr(&bit, i); - - // disable all pieces of that variable - for(i=0; inode == node && v->name == n) - v->addr = 2; - } - } -} static char* regname[] = { ".R0", @@ -128,1188 +73,26 @@ static char* regname[] = { ".F15", }; -static Node* regnodes[NREGVAR]; - -static void walkvardef(Node *n, Reg *r, int active); - -void -regopt(Prog *firstp) -{ - Reg *r, *r1; - Prog *p; - Graph *g; - int i, z, active; - uint32 vreg; - Bits bit; - ProgInfo info; - - if(first) { - fmtinstall('Q', Qconv); - first = 0; - } - - mergetemp(firstp); - - /* - * control flow is more complicated in generated go code - * than in generated c code. define pseudo-variables for - * registers, so we have complete register usage information. - */ - nvar = NREGVAR; - memset(var, 0, NREGVAR*sizeof var[0]); - for(i=0; iopt = nil; - return; - } - - firstr = (Reg*)g->start; - - for(r = firstr; r != R; r = (Reg*)r->f.link) { - p = r->f.prog; - if(p->as == AVARDEF || p->as == AVARKILL) - continue; - proginfo(&info, p); - - // Avoid making variables for direct-called functions. - if(p->as == ABL && p->to.name == NAME_EXTERN) - continue; - - bit = mkvar(r, &p->from); - if(info.flags & LeftRead) - for(z=0; zuse1.b[z] |= bit.b[z]; - if(info.flags & LeftAddr) - setaddrs(bit); - - if(info.flags & RegRead) - r->use1.b[0] |= RtoB(p->reg); - - if(info.flags & (RightAddr | RightRead | RightWrite)) { - bit = mkvar(r, &p->to); - if(info.flags & RightAddr) - setaddrs(bit); - if(info.flags & RightRead) - for(z=0; zuse2.b[z] |= bit.b[z]; - if(info.flags & RightWrite) - for(z=0; zset.b[z] |= bit.b[z]; - } - - /* the mod/div runtime routines smash R12 */ - if(p->as == ADIV || p->as == ADIVU || p->as == AMOD || p->as == AMODU) - r->set.b[0] |= RtoB(REG_R12); - } - if(firstr == R) - return; - - for(i=0; iaddr) { - bit = blsh(i); - for(z=0; zaddr, v->etype, v->width, v->node, v->offset); - } - - if(debug['R'] && debug['v']) - dumpit("pass1", &firstr->f, 1); - - /* - * pass 2 - * find looping structure - */ - flowrpo(g); - - if(debug['R'] && debug['v']) - dumpit("pass2", &firstr->f, 1); - - /* - * pass 2.5 - * iterate propagating fat vardef covering forward - * r->act records vars with a VARDEF since the last CALL. - * (r->act will be reused in pass 5 for something else, - * but we'll be done with it by then.) - */ - active = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) { - r->f.active = 0; - r->act = zbits; - } - for(r = firstr; r != R; r = (Reg*)r->f.link) { - p = r->f.prog; - if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) { - active++; - walkvardef(p->to.node, r, active); - } - } - - /* - * pass 3 - * iterate propagating usage - * back until flow graph is complete - */ -loop1: - change = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->f.active = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - if(r->f.prog->as == ARET) - prop(r, zbits, zbits); -loop11: - /* pick up unreachable code */ - i = 0; - for(r = firstr; r != R; r = r1) { - r1 = (Reg*)r->f.link; - if(r1 && r1->f.active && !r->f.active) { - prop(r, zbits, zbits); - i = 1; - } - } - if(i) - goto loop11; - if(change) - goto loop1; - - if(debug['R'] && debug['v']) - dumpit("pass3", &firstr->f, 1); - - - /* - * pass 4 - * iterate propagating register/variable synchrony - * forward until graph is complete - */ -loop2: - change = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->f.active = 0; - synch(firstr, zbits); - if(change) - goto loop2; - - addsplits(); - - if(debug['R'] && debug['v']) - dumpit("pass4", &firstr->f, 1); - - if(debug['R'] > 1) { - print("\nprop structure:\n"); - for(r = firstr; r != R; r = (Reg*)r->f.link) { - print("%d:%P", r->f.loop, r->f.prog); - for(z=0; zset.b[z] | - r->refahead.b[z] | r->calahead.b[z] | - r->refbehind.b[z] | r->calbehind.b[z] | - r->use1.b[z] | r->use2.b[z]; - bit.b[z] &= ~addrs.b[z]; - } - - if(bany(&bit)) { - print("\t"); - if(bany(&r->use1)) - print(" u1=%Q", r->use1); - if(bany(&r->use2)) - print(" u2=%Q", r->use2); - if(bany(&r->set)) - print(" st=%Q", r->set); - if(bany(&r->refahead)) - print(" ra=%Q", r->refahead); - if(bany(&r->calahead)) - print(" ca=%Q", r->calahead); - if(bany(&r->refbehind)) - print(" rb=%Q", r->refbehind); - if(bany(&r->calbehind)) - print(" cb=%Q", r->calbehind); - } - print("\n"); - } - } - - /* - * pass 4.5 - * move register pseudo-variables into regu. - */ - for(r = firstr; r != R; r = (Reg*)r->f.link) { - r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; - - r->set.b[0] &= ~REGBITS; - r->use1.b[0] &= ~REGBITS; - r->use2.b[0] &= ~REGBITS; - r->refbehind.b[0] &= ~REGBITS; - r->refahead.b[0] &= ~REGBITS; - r->calbehind.b[0] &= ~REGBITS; - r->calahead.b[0] &= ~REGBITS; - r->regdiff.b[0] &= ~REGBITS; - r->act.b[0] &= ~REGBITS; - } - - if(debug['R'] && debug['v']) - dumpit("pass4.5", &firstr->f, 1); - - /* - * pass 5 - * isolate regions - * calculate costs (paint1) - */ - r = firstr; - if(r) { - for(z=0; zrefahead.b[z] | r->calahead.b[z]) & - ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); - if(bany(&bit) && !r->f.refset) { - // should never happen - all variables are preset - if(debug['w']) - print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); - r->f.refset = 1; - } - } - - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->act = zbits; - rgp = region; - nregion = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) { - for(z=0; zset.b[z] & - ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); - if(bany(&bit) && !r->f.refset) { - if(debug['w']) - print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); - r->f.refset = 1; - excise(&r->f); - } - for(z=0; zact.b[z] | addrs.b[z]); - while(bany(&bit)) { - i = bnum(bit); - rgp->enter = r; - rgp->varno = i; - change = 0; - if(debug['R'] > 1) - print("\n"); - paint1(r, i); - biclr(&bit, i); - if(change <= 0) { - if(debug['R']) - print("%L $%d: %Q\n", - r->f.prog->lineno, change, blsh(i)); - continue; - } - rgp->cost = change; - nregion++; - if(nregion >= NRGN) { - if(debug['R'] > 1) - print("too many regions\n"); - goto brk; - } - rgp++; - } - } -brk: - qsort(region, nregion, sizeof(region[0]), rcmp); - - if(debug['R'] && debug['v']) - dumpit("pass5", &firstr->f, 1); - - /* - * pass 6 - * determine used registers (paint2) - * replace code (paint3) - */ - rgp = region; - if(debug['R'] && debug['v']) - print("\nregisterizing\n"); - for(i=0; icost, rgp->varno, rgp->enter->f.prog->pc); - bit = blsh(rgp->varno); - vreg = paint2(rgp->enter, rgp->varno, 0); - vreg = allreg(vreg, rgp); - if(debug['R']) { - print("%L $%d %R: %Q\n", - rgp->enter->f.prog->lineno, - rgp->cost, - rgp->regno, - bit); - } - if(rgp->regno != 0) - paint3(rgp->enter, rgp->varno, vreg, rgp->regno); - rgp++; - } - - /* - * free aux structures. peep allocates new ones. - */ - for(i=0; iopt = nil; - flowend(g); - firstr = R; - - if(debug['R'] && debug['v']) { - // Rebuild flow graph, since we inserted instructions - g = flowstart(firstp, sizeof(Reg)); - firstr = (Reg*)g->start; - dumpit("pass6", &firstr->f, 1); - flowend(g); - firstr = R; - } - - /* - * pass 7 - * peep-hole on basic block - */ - if(!debug['R'] || debug['P']) { - peep(firstp); - } - - if(debug['R'] && debug['v']) - dumpit("pass7", &firstr->f, 1); - - /* - * last pass - * eliminate nops - * free aux structures - * adjust the stack pointer - * MOVW.W R1,-12(R13) <<- start - * MOVW R0,R1 - * MOVW R1,8(R13) - * MOVW $0,R1 - * MOVW R1,4(R13) - * BL ,runtime.newproc+0(SB) - * MOVW &ft+-32(SP),R7 <<- adjust - * MOVW &j+-40(SP),R6 <<- adjust - * MOVW autotmp_0003+-24(SP),R5 <<- adjust - * MOVW $12(R13),R13 <<- finish - */ - vreg = 0; - for(p = firstp; p != P; p = p->link) { - while(p->link != P && p->link->as == ANOP) - p->link = p->link->link; - if(p->to.type == TYPE_BRANCH) - while(p->to.u.branch != P && p->to.u.branch->as == ANOP) - p->to.u.branch = p->to.u.branch->link; - if(p->as == AMOVW && p->to.reg == 13) { - if(p->scond & C_WBIT) { - vreg = -p->to.offset; // in adjust region -// print("%P adjusting %d\n", p, vreg); - continue; - } - if(p->from.type == TYPE_CONST && p->to.type == TYPE_REG) { - if(p->from.offset != vreg) - print("in and out different\n"); -// print("%P finish %d\n", p, vreg); - vreg = 0; // done adjust region - continue; - } - -// print("%P %d %d from type\n", p, p->from.type, TYPE_CONST); -// print("%P %d %d to type\n\n", p, p->to.type, TYPE_REG); - } - - if(p->as == AMOVW && vreg != 0) { - if(p->from.sym != nil) - if(p->from.name == NAME_AUTO || p->from.name == NAME_PARAM) { - p->from.offset += vreg; -// print("%P adjusting from %d %d\n", p, vreg, p->from.type); - } - if(p->to.sym != nil) - if(p->to.name == NAME_AUTO || p->to.name == NAME_PARAM) { - p->to.offset += vreg; -// print("%P adjusting to %d %d\n", p, vreg, p->from.type); - } - } - } -} - -static void -walkvardef(Node *n, Reg *r, int active) -{ - Reg *r1, *r2; - int bn; - Var *v; - - for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { - if(r1->f.active == active) - break; - r1->f.active = active; - if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) - break; - for(v=n->opt; v!=nil; v=v->nextinnode) { - bn = v - var; - biset(&r1->act, bn); - } - if(r1->f.prog->as == ABL) - break; - } - - for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) - if(r2->f.s2 != nil) - walkvardef(n, (Reg*)r2->f.s2, active); -} - -void -addsplits(void) -{ - Reg *r, *r1; - int z, i; - Bits bit; - - for(r = firstr; r != R; r = (Reg*)r->f.link) { - if(r->f.loop > 1) - continue; - if(r->f.prog->as == ABL) - continue; - if(r->f.prog->as == ADUFFZERO) - continue; - if(r->f.prog->as == ADUFFCOPY) - continue; - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) { - if(r1->f.loop <= 1) - continue; - for(z=0; zcalbehind.b[z] & - (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & - ~(r->calahead.b[z] & addrs.b[z]); - while(bany(&bit)) { - i = bnum(bit); - biclr(&bit, i); - } - } - } -} - -/* - * add mov b,rn - * just after r - */ -void -addmove(Reg *r, int bn, int rn, int f) -{ - Prog *p, *p1, *p2; - Adr *a; - Var *v; - - p1 = mal(sizeof(*p1)); - *p1 = zprog; - p = r->f.prog; - - // If there's a stack fixup coming (after BL newproc or BL deferproc), - // delay the load until after the fixup. - p2 = p->link; - if(p2 && p2->as == AMOVW && p2->from.type == TYPE_ADDR && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == TYPE_REG) - p = p2; - - p1->link = p->link; - p->link = p1; - p1->lineno = p->lineno; - - v = var + bn; - - a = &p1->to; - a->name = v->name; - a->node = v->node; - a->sym = linksym(v->node->sym); - a->offset = v->offset; - a->etype = v->etype; - a->type = TYPE_MEM; - if(a->etype == TARRAY) - a->type = TYPE_ADDR; - else if(a->sym == nil) - a->type = TYPE_CONST; - - if(v->addr) - fatal("addmove: shouldn't be doing this %A\n", a); - - switch(v->etype) { - default: - print("What is this %E\n", v->etype); - - case TINT8: - p1->as = AMOVBS; - break; - case TBOOL: - case TUINT8: -//print("movbu %E %d %S\n", v->etype, bn, v->sym); - p1->as = AMOVBU; - break; - case TINT16: - p1->as = AMOVHS; - break; - case TUINT16: - p1->as = AMOVHU; - break; - case TINT32: - case TUINT32: - case TPTR32: - p1->as = AMOVW; - break; - case TFLOAT32: - p1->as = AMOVF; - break; - case TFLOAT64: - p1->as = AMOVD; - break; - } - - p1->from.type = TYPE_REG; - p1->from.reg = rn; - if(!f) { - p1->from = *a; - *a = zprog.from; - a->type = TYPE_REG; - a->reg = rn; - if(v->etype == TUINT8 || v->etype == TBOOL) - p1->as = AMOVBU; - if(v->etype == TUINT16) - p1->as = AMOVHU; - } - if(debug['R']) - print("%P\t.a%P\n", p, p1); -} - -static int -overlap(int32 o1, int w1, int32 o2, int w2) -{ - int32 t1, t2; - - t1 = o1+w1; - t2 = o2+w2; - - if(!(t1 > o2 && t2 > o1)) - return 0; - - return 1; -} - -Bits -mkvar(Reg *r, Adr *a) -{ - Var *v; - int i, t, n, et, z, w, flag; - int32 o; - Bits bit; - Node *node; - - // mark registers used - t = a->type; - - flag = 0; - switch(t) { - default: - print("mkvar: type %d %d %D\n", t, a->name, a); - goto none; - - case TYPE_NONE: - case TYPE_FCONST: - case TYPE_BRANCH: - break; - - - case TYPE_REGREG: - case TYPE_REGREG2: - bit = zbits; - if(a->offset != 0) - bit.b[0] |= RtoB(a->offset); - if(a->reg != 0) - bit.b[0] |= RtoB(a->reg); - return bit; - - case TYPE_CONST: - if(a->reg != 0) - fatal("found CONST instead of ADDR: %D", a); - break; - - case TYPE_ADDR: - case TYPE_REG: - case TYPE_SHIFT: - if(a->reg != 0) { - bit = zbits; - bit.b[0] = RtoB(a->reg); - return bit; - } - break; - - case TYPE_MEM: - if(a->reg != 0) { - if(a == &r->f.prog->from) - r->use1.b[0] |= RtoB(a->reg); - else - r->use2.b[0] |= RtoB(a->reg); - if(r->f.prog->scond & (C_PBIT|C_WBIT)) - r->set.b[0] |= RtoB(a->reg); - } - break; - } - - switch(a->name) { - default: - goto none; - - case NAME_EXTERN: - case NAME_STATIC: - case NAME_AUTO: - case NAME_PARAM: - n = a->name; - break; - } - - node = a->node; - if(node == N || node->op != ONAME || node->orig == N) - goto none; - node = node->orig; - if(node->orig != node) - fatal("%D: bad node", a); - if(node->sym == S || node->sym->name[0] == '.') - goto none; - et = a->etype; - o = a->offset; - w = a->width; - if(w < 0) - fatal("bad width %d for %D", w, a); - - for(i=0; inode == node && v->name == n) { - if(v->offset == o) - if(v->etype == et) - if(v->width == w) - if(!flag) - return blsh(i); - - // if they overlap, disable both - if(overlap(v->offset, v->width, o, w)) { - v->addr = 1; - flag = 1; - } - } - } - - switch(et) { - case 0: - case TFUNC: - goto none; - } - - if(nvar >= NVAR) { - if(debug['w'] > 1 && node) - fatal("variable not optimized: %D", a); - - // If we're not tracking a word in a variable, mark the rest as - // having its address taken, so that we keep the whole thing - // live at all calls. otherwise we might optimize away part of - // a variable but not all of it. - for(i=0; inode == node) - v->addr = 1; - } - goto none; - } - - i = nvar; - nvar++; -//print("var %d %E %D %S\n", i, et, a, s); - v = var+i; - v->offset = o; - v->name = n; - v->etype = et; - v->width = w; - v->addr = flag; // funny punning - v->node = node; - - // node->opt is the head of a linked list - // of Vars within the given Node, so that - // we can start at a Var and find all the other - // Vars in the same Go variable. - v->nextinnode = node->opt; - node->opt = v; - - bit = blsh(i); - if(n == NAME_EXTERN || n == NAME_STATIC) - for(z=0; zclass == PPARAM) - for(z=0; zclass == PPARAMOUT) - for(z=0; zaddrtaken) - v->addr = 1; - - // Disable registerization for globals, because: - // (1) we might panic at any time and we want the recovery code - // to see the latest values (issue 1304). - // (2) we don't know what pointers might point at them and we want - // loads via those pointers to see updated values and vice versa (issue 7995). - // - // Disable registerization for results if using defer, because the deferred func - // might recover and return, causing the current values to be used. - if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) - v->addr = 1; - - if(debug['R']) - print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); - - return bit; - -none: - return zbits; -} - -void -prop(Reg *r, Bits ref, Bits cal) +char** +regnames(int *n) { - Reg *r1, *r2; - int z, i, j; - Var *v, *v1; - - for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { - for(z=0; zrefahead.b[z]; - if(ref.b[z] != r1->refahead.b[z]) { - r1->refahead.b[z] = ref.b[z]; - change++; - } - cal.b[z] |= r1->calahead.b[z]; - if(cal.b[z] != r1->calahead.b[z]) { - r1->calahead.b[z] = cal.b[z]; - change++; - } - } - switch(r1->f.prog->as) { - case ABL: - if(noreturn(r1->f.prog)) - break; - - // Mark all input variables (ivar) as used, because that's what the - // liveness bitmaps say. The liveness bitmaps say that so that a - // panic will not show stale values in the parameter dump. - // Mark variables with a recent VARDEF (r1->act) as used, - // so that the optimizer flushes initializations to memory, - // so that if a garbage collection happens during this CALL, - // the collector will see initialized memory. Again this is to - // match what the liveness bitmaps say. - for(z=0; zact.b[z]; - ref.b[z] = 0; - } - - // cal.b is the current approximation of what's live across the call. - // Every bit in cal.b is a single stack word. For each such word, - // find all the other tracked stack words in the same Go variable - // (struct/slice/string/interface) and mark them live too. - // This is necessary because the liveness analysis for the garbage - // collector works at variable granularity, not at word granularity. - // It is fundamental for slice/string/interface: the garbage collector - // needs the whole value, not just some of the words, in order to - // interpret the other bits correctly. Specifically, slice needs a consistent - // ptr and cap, string needs a consistent ptr and len, and interface - // needs a consistent type word and data word. - for(z=0; z= nvar || ((cal.b[z]>>i)&1) == 0) - continue; - v = var+z*64+i; - if(v->node->opt == nil) // v represents fixed register, not Go variable - continue; - - // v->node->opt is the head of a linked list of Vars - // corresponding to tracked words from the Go variable v->node. - // Walk the list and set all the bits. - // For a large struct this could end up being quadratic: - // after the first setting, the outer loop (for z, i) would see a 1 bit - // for all of the remaining words in the struct, and for each such - // word would go through and turn on all the bits again. - // To avoid the quadratic behavior, we only turn on the bits if - // v is the head of the list or if the head's bit is not yet turned on. - // This will set the bits at most twice, keeping the overall loop linear. - v1 = v->node->opt; - j = v1 - var; - if(v == v1 || !btest(&cal, j)) { - for(; v1 != nil; v1 = v1->nextinnode) { - j = v1 - var; - biset(&cal, j); - } - } - } - } - break; - - case ATEXT: - for(z=0; zset.b[z]) | - r1->use1.b[z] | r1->use2.b[z]; - cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); - r1->refbehind.b[z] = ref.b[z]; - r1->calbehind.b[z] = cal.b[z]; - } - if(r1->f.active) - break; - r1->f.active = 1; - } - for(; r != r1; r = (Reg*)r->f.p1) - for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) - prop(r2, r->refbehind, r->calbehind); + *n = NREGVAR; + return regname; } -void -synch(Reg *r, Bits dif) +uint64 +excludedregs(void) { - Reg *r1; - int z; - - for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { - for(z=0; zrefbehind.b[z] & r1->refahead.b[z])) | - r1->set.b[z] | r1->regdiff.b[z]; - if(dif.b[z] != r1->regdiff.b[z]) { - r1->regdiff.b[z] = dif.b[z]; - change++; - } - } - if(r1->f.active) - break; - r1->f.active = 1; - for(z=0; zcalbehind.b[z] & r1->calahead.b[z]); - if(r1->f.s2 != nil) - synch((Reg*)r1->f.s2, dif); - } + return RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC); } -uint32 -allreg(uint32 b, Rgn *r) +uint64 +doregbits(int r) { - Var *v; - int i; - - v = var + r->varno; - r->regno = 0; - switch(v->etype) { - - default: - fatal("unknown etype %d/%E", bitno(b), v->etype); - break; - - case TINT8: - case TUINT8: - case TINT16: - case TUINT16: - case TINT32: - case TUINT32: - case TINT: - case TUINT: - case TUINTPTR: - case TBOOL: - case TPTR32: - i = BtoR(~b); - if(i && r->cost >= 0) { - r->regno = i; - return RtoB(i); - } - break; - - case TFLOAT32: - case TFLOAT64: - i = BtoF(~b); - if(i && r->cost >= 0) { - r->regno = i; - return RtoB(i); - } - break; - - case TINT64: - case TUINT64: - case TPTR64: - case TINTER: - case TSTRUCT: - case TARRAY: - break; - } + USED(r); return 0; } -void -paint1(Reg *r, int bn) -{ - Reg *r1; - Prog *p; - int z; - uint64 bb; - - z = bn/64; - bb = 1LL<<(bn%64); - if(r->act.b[z] & bb) - return; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(r1->act.b[z] & bb) - break; - r = r1; - } - - if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { - change -= CLOAD * r->f.loop; - if(debug['R'] > 1) - print("%d%P\td %Q $%d\n", r->f.loop, - r->f.prog, blsh(bn), change); - } - for(;;) { - r->act.b[z] |= bb; - p = r->f.prog; - - - if(r->f.prog->as != ANOP) { // don't give credit for NOPs - if(r->use1.b[z] & bb) { - change += CREF * r->f.loop; - if(debug['R'] > 1) - print("%d%P\tu1 %Q $%d\n", r->f.loop, - p, blsh(bn), change); - } - if((r->use2.b[z]|r->set.b[z]) & bb) { - change += CREF * r->f.loop; - if(debug['R'] > 1) - print("%d%P\tu2 %Q $%d\n", r->f.loop, - p, blsh(bn), change); - } - } - - if(STORE(r) & r->regdiff.b[z] & bb) { - change -= CLOAD * r->f.loop; - if(debug['R'] > 1) - print("%d%P\tst %Q $%d\n", r->f.loop, - p, blsh(bn), change); - } - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - paint1(r1, bn); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - paint1(r1, bn); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(r->act.b[z] & bb) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } -} - -uint32 -paint2(Reg *r, int bn, int depth) -{ - Reg *r1; - int z; - uint64 bb, vreg; - - z = bn/64; - bb = 1LL << (bn%64); - vreg = regbits; - if(!(r->act.b[z] & bb)) - return vreg; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(!(r1->act.b[z] & bb)) - break; - r = r1; - } - for(;;) { - if(debug['R'] && debug['v']) - print(" paint2 %d %P\n", depth, r->f.prog); - - r->act.b[z] &= ~bb; - - vreg |= r->regu; - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - vreg |= paint2(r1, bn, depth+1); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - vreg |= paint2(r1, bn, depth+1); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(!(r->act.b[z] & bb)) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } - return vreg; -} - -void -paint3(Reg *r, int bn, uint32 rb, int rn) -{ - Reg *r1; - Prog *p; - int z; - uint64 bb; - - z = bn/64; - bb = 1LL << (bn%64); - if(r->act.b[z] & bb) - return; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(r1->act.b[z] & bb) - break; - r = r1; - } - - if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) - addmove(r, bn, rn, 0); - - for(;;) { - r->act.b[z] |= bb; - p = r->f.prog; - - if(r->use1.b[z] & bb) { - if(debug['R']) - print("%P", p); - addreg(&p->from, rn); - if(debug['R']) - print("\t.c%P\n", p); - } - if((r->use2.b[z]|r->set.b[z]) & bb) { - if(debug['R']) - print("%P", p); - addreg(&p->to, rn); - if(debug['R']) - print("\t.c%P\n", p); - } - - if(STORE(r) & r->regdiff.b[z] & bb) - addmove(r, bn, rn, 1); - r->regu |= rb; - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - paint3(r1, bn, rb, rn); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - paint3(r1, bn, rb, rn); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(r->act.b[z] & bb) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } -} - -void -addreg(Adr *a, int rn) -{ - a->sym = nil; - a->node = nil; - a->name = NAME_NONE; - a->type = TYPE_REG; - a->reg = rn; -} - /* * bit reg * 0 R0 @@ -1324,26 +107,26 @@ addreg(Adr *a, int rn) * ... ... * 31 F15 */ -uint32 +uint64 RtoB(int r) { if(REG_R0 <= r && r <= REG_R15) { if(r >= REGTMP-2 && r != REG_R12) // excluded R9 and R10 for m and g, but not R12 return 0; - return 1L << (r - REG_R0); + return 1ULL << (r - REG_R0); } if(REG_F0 <= r && r <= REG_F15) { if(r < REG_F2 || r > REG_F0+NFREG-1) return 0; - return 1L << ((r - REG_F0) + 16); + return 1ULL << ((r - REG_F0) + 16); } return 0; } int -BtoR(uint32 b) +BtoR(uint64 b) { // TODO Allow R0 and R1, but be careful with a 0 return // TODO Allow R9. Only R10 is reserved now (just g, not m). @@ -1354,88 +137,10 @@ BtoR(uint32 b) } int -BtoF(uint32 b) +BtoF(uint64 b) { b &= 0xfffc0000L; if(b == 0) return 0; return bitno(b) - 16 + REG_F0; } - -void -dumpone(Flow *f, int isreg) -{ - int z; - Bits bit; - Reg *r; - - print("%d:%P", f->loop, f->prog); - if(isreg) { - r = (Reg*)f; - for(z=0; zset.b[z] | - r->use1.b[z] | - r->use2.b[z] | - r->refbehind.b[z] | - r->refahead.b[z] | - r->calbehind.b[z] | - r->calahead.b[z] | - r->regdiff.b[z] | - r->act.b[z] | - 0; - if(bany(&bit)) { - print("\t"); - if(bany(&r->set)) - print(" s:%Q", r->set); - if(bany(&r->use1)) - print(" u1:%Q", r->use1); - if(bany(&r->use2)) - print(" u2:%Q", r->use2); - if(bany(&r->refbehind)) - print(" rb:%Q ", r->refbehind); - if(bany(&r->refahead)) - print(" ra:%Q ", r->refahead); - if(bany(&r->calbehind)) - print(" cb:%Q ", r->calbehind); - if(bany(&r->calahead)) - print(" ca:%Q ", r->calahead); - if(bany(&r->regdiff)) - print(" d:%Q ", r->regdiff); - if(bany(&r->act)) - print(" a:%Q ", r->act); - } - } - print("\n"); -} - -void -dumpit(char *str, Flow *r0, int isreg) -{ - Flow *r, *r1; - - print("\n%s\n", str); - for(r = r0; r != nil; r = r->link) { - dumpone(r, isreg); - r1 = r->p2; - if(r1 != nil) { - print(" pred:"); - for(; r1 != nil; r1 = r1->p2link) - print(" %.4ud", (int)r1->prog->pc); - if(r->p1 != nil) - print(" (and %.4ud)", (int)r->p1->prog->pc); - else - print(" (only)"); - print("\n"); - } - // Print successors if it's not just the next one - if(r->s1 != r->link || r->s2 != nil) { - print(" succ:"); - if(r->s1 != nil) - print(" %.4ud", (int)r->s1->prog->pc); - if(r->s2 != nil) - print(" %.4ud", (int)r->s2->prog->pc); - print("\n"); - } - } -} diff --git a/src/cmd/6g/galign.c b/src/cmd/6g/galign.c index 71ad402cd5..ad66366c78 100644 --- a/src/cmd/6g/galign.c +++ b/src/cmd/6g/galign.c @@ -89,14 +89,22 @@ main(int argc, char **argv) arch.ginscall = ginscall; arch.igen = igen; arch.linkarchinit = linkarchinit; + arch.peep = peep; arch.proginfo = proginfo; arch.regalloc = regalloc; arch.regfree = regfree; - arch.regopt = regopt; arch.regtyp = regtyp; arch.sameaddr = sameaddr; arch.smallindir = smallindir; arch.stackaddr = stackaddr; + arch.excludedregs = excludedregs; + arch.RtoB = RtoB; + arch.FtoB = FtoB; + arch.BtoR = BtoR; + arch.BtoF = BtoF; + arch.optoas = optoas; + arch.doregbits = doregbits; + arch.regnames = regnames; gcmain(argc, argv); } diff --git a/src/cmd/6g/gg.h b/src/cmd/6g/gg.h index 789a9870ea..a6dfad9c8e 100644 --- a/src/cmd/6g/gg.h +++ b/src/cmd/6g/gg.h @@ -159,3 +159,18 @@ int smallindir(Addr*, Addr*); int stackaddr(Addr*); Prog* unpatch(Prog*); +/* + * reg.c + */ +uint64 excludedregs(void); +uint64 RtoB(int); +uint64 FtoB(int); +int BtoR(uint64); +int BtoF(uint64); +uint64 doregbits(int); +char** regnames(int*); + +/* + * peep.c + */ +void peep(Prog*); diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c index dd61812bcd..72104589a3 100644 --- a/src/cmd/6g/ggen.c +++ b/src/cmd/6g/ggen.c @@ -7,7 +7,7 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" static Prog *appendpp(Prog*, int, int, int, vlong, int, int, vlong); static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax); diff --git a/src/cmd/6g/peep.c b/src/cmd/6g/peep.c index e05a06087f..279b60d4e4 100644 --- a/src/cmd/6g/peep.c +++ b/src/cmd/6g/peep.c @@ -31,7 +31,7 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" static void conprop(Flow *r); static void elimshortmov(Graph *g); @@ -44,9 +44,15 @@ static int copy1(Adr*, Adr*, Flow*, int); static int copyas(Adr*, Adr*); static int copyau(Adr*, Adr*); static int copysub(Adr*, Adr*, Adr*, int); +static int copyu(Prog*, Adr*, Adr*); static uint32 gactive; +enum +{ + exregoffset = REG_R15, +}; + // do we need the carry bit static int needc(Prog *p) @@ -737,7 +743,7 @@ copy1(Adr *v1, Adr *v2, Flow *r, int f) * 4 if set and used * 0 otherwise (not touched) */ -int +static int copyu(Prog *p, Adr *v, Adr *s) { ProgInfo info; diff --git a/src/cmd/6g/prog.c b/src/cmd/6g/prog.c index 32d5256f8c..79b7911e5b 100644 --- a/src/cmd/6g/prog.c +++ b/src/cmd/6g/prog.c @@ -5,7 +5,7 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" // Matches real RtoB but can be used in global initializer. #define RtoB(r) (1<<((r)-REG_AX)) diff --git a/src/cmd/6g/reg.c b/src/cmd/6g/reg.c index 7db44245f1..e01f265a13 100644 --- a/src/cmd/6g/reg.c +++ b/src/cmd/6g/reg.c @@ -31,56 +31,11 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" -#define NREGVAR 32 /* 16 general + 16 floating */ -#define REGBITS ((uint64)0xffffffffull) -/*c2go enum { +enum { NREGVAR = 32, - REGBITS = 0xffffffff, }; -*/ - -static Reg* firstr; -static int first = 1; - -int -rcmp(const void *a1, const void *a2) -{ - Rgn *p1, *p2; - int c1, c2; - - p1 = (Rgn*)a1; - p2 = (Rgn*)a2; - c1 = p2->cost; - c2 = p1->cost; - if(c1 -= c2) - return c1; - return p2->varno - p1->varno; -} - -static void -setaddrs(Bits bit) -{ - int i, n; - Var *v; - Node *node; - - while(bany(&bit)) { - // convert each bit to a variable - i = bnum(bit); - node = var[i].node; - n = var[i].name; - biclr(&bit, i); - - // disable all pieces of that variable - for(i=0; inode == node && v->name == n) - v->addr = 2; - } - } -} static char* regname[] = { ".AX", @@ -117,471 +72,23 @@ static char* regname[] = { ".X15", }; -static Node* regnodes[NREGVAR]; - -static void walkvardef(Node *n, Reg *r, int active); - -void -regopt(Prog *firstp) -{ - Reg *r, *r1; - Prog *p; - Graph *g; - ProgInfo info; - int i, z, active; - uint32 vreg; - Bits bit; - - if(first) { - fmtinstall('Q', Qconv); - exregoffset = REG_R15; - first = 0; - } - - mergetemp(firstp); - - /* - * control flow is more complicated in generated go code - * than in generated c code. define pseudo-variables for - * registers, so we have complete register usage information. - */ - nvar = NREGVAR; - memset(var, 0, NREGVAR*sizeof var[0]); - for(i=0; iopt = nil; - return; - } - - firstr = (Reg*)g->start; - - for(r = firstr; r != R; r = (Reg*)r->f.link) { - p = r->f.prog; - if(p->as == AVARDEF || p->as == AVARKILL) - continue; - proginfo(&info, p); - - // Avoid making variables for direct-called functions. - if(p->as == ACALL && p->to.type == TYPE_MEM && p->to.name == NAME_EXTERN) - continue; - - r->use1.b[0] |= info.reguse | info.regindex; - r->set.b[0] |= info.regset; - - bit = mkvar(r, &p->from); - if(bany(&bit)) { - if(info.flags & LeftAddr) - setaddrs(bit); - if(info.flags & LeftRead) - for(z=0; zuse1.b[z] |= bit.b[z]; - if(info.flags & LeftWrite) - for(z=0; zset.b[z] |= bit.b[z]; - } - - bit = mkvar(r, &p->to); - if(bany(&bit)) { - if(info.flags & RightAddr) - setaddrs(bit); - if(info.flags & RightRead) - for(z=0; zuse2.b[z] |= bit.b[z]; - if(info.flags & RightWrite) - for(z=0; zset.b[z] |= bit.b[z]; - } - } - - for(i=0; iaddr) { - bit = blsh(i); - for(z=0; zaddr, v->etype, v->width, v->node, v->offset); - } - - if(debug['R'] && debug['v']) - dumpit("pass1", &firstr->f, 1); - - /* - * pass 2 - * find looping structure - */ - flowrpo(g); - - if(debug['R'] && debug['v']) - dumpit("pass2", &firstr->f, 1); - - /* - * pass 2.5 - * iterate propagating fat vardef covering forward - * r->act records vars with a VARDEF since the last CALL. - * (r->act will be reused in pass 5 for something else, - * but we'll be done with it by then.) - */ - active = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) { - r->f.active = 0; - r->act = zbits; - } - for(r = firstr; r != R; r = (Reg*)r->f.link) { - p = r->f.prog; - if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) { - active++; - walkvardef(p->to.node, r, active); - } - } - - /* - * pass 3 - * iterate propagating usage - * back until flow graph is complete - */ -loop1: - change = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->f.active = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - if(r->f.prog->as == ARET) - prop(r, zbits, zbits); -loop11: - /* pick up unreachable code */ - i = 0; - for(r = firstr; r != R; r = r1) { - r1 = (Reg*)r->f.link; - if(r1 && r1->f.active && !r->f.active) { - prop(r, zbits, zbits); - i = 1; - } - } - if(i) - goto loop11; - if(change) - goto loop1; - - if(debug['R'] && debug['v']) - dumpit("pass3", &firstr->f, 1); - - /* - * pass 4 - * iterate propagating register/variable synchrony - * forward until graph is complete - */ -loop2: - change = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->f.active = 0; - synch(firstr, zbits); - if(change) - goto loop2; - - if(debug['R'] && debug['v']) - dumpit("pass4", &firstr->f, 1); - - /* - * pass 4.5 - * move register pseudo-variables into regu. - */ - for(r = firstr; r != R; r = (Reg*)r->f.link) { - r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; - - r->set.b[0] &= ~REGBITS; - r->use1.b[0] &= ~REGBITS; - r->use2.b[0] &= ~REGBITS; - r->refbehind.b[0] &= ~REGBITS; - r->refahead.b[0] &= ~REGBITS; - r->calbehind.b[0] &= ~REGBITS; - r->calahead.b[0] &= ~REGBITS; - r->regdiff.b[0] &= ~REGBITS; - r->act.b[0] &= ~REGBITS; - } - - /* - * pass 5 - * isolate regions - * calculate costs (paint1) - */ - r = firstr; - if(r) { - for(z=0; zrefahead.b[z] | r->calahead.b[z]) & - ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); - if(bany(&bit) && !r->f.refset) { - // should never happen - all variables are preset - if(debug['w']) - print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); - r->f.refset = 1; - } - } - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->act = zbits; - rgp = region; - nregion = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) { - for(z=0; zset.b[z] & - ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); - if(bany(&bit) && !r->f.refset) { - if(debug['w']) - print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); - r->f.refset = 1; - excise(&r->f); - } - for(z=0; zact.b[z] | addrs.b[z]); - while(bany(&bit)) { - i = bnum(bit); - rgp->enter = r; - rgp->varno = i; - change = 0; - paint1(r, i); - biclr(&bit, i); - if(change <= 0) - continue; - rgp->cost = change; - nregion++; - if(nregion >= NRGN) { - if(debug['R'] && debug['v']) - print("too many regions\n"); - goto brk; - } - rgp++; - } - } -brk: - qsort(region, nregion, sizeof(region[0]), rcmp); - - if(debug['R'] && debug['v']) - dumpit("pass5", &firstr->f, 1); - - /* - * pass 6 - * determine used registers (paint2) - * replace code (paint3) - */ - rgp = region; - if(debug['R'] && debug['v']) - print("\nregisterizing\n"); - for(i=0; icost, rgp->varno, rgp->enter->f.prog->pc); - bit = blsh(rgp->varno); - vreg = paint2(rgp->enter, rgp->varno, 0); - vreg = allreg(vreg, rgp); - if(rgp->regno != 0) { - if(debug['R'] && debug['v']) { - Var *v; - - v = var + rgp->varno; - print("registerize %N+%lld (bit=%2d et=%2E) in %R\n", - v->node, v->offset, rgp->varno, v->etype, rgp->regno); - } - paint3(rgp->enter, rgp->varno, vreg, rgp->regno); - } - rgp++; - } - - /* - * free aux structures. peep allocates new ones. - */ - for(i=0; iopt = nil; - flowend(g); - firstr = R; - - if(debug['R'] && debug['v']) { - // Rebuild flow graph, since we inserted instructions - g = flowstart(firstp, sizeof(Reg)); - firstr = (Reg*)g->start; - dumpit("pass6", &firstr->f, 1); - flowend(g); - firstr = R; - } - - /* - * pass 7 - * peep-hole on basic block - */ - if(!debug['R'] || debug['P']) - peep(firstp); - - /* - * eliminate nops - */ - for(p=firstp; p!=P; p=p->link) { - while(p->link != P && p->link->as == ANOP) - p->link = p->link->link; - if(p->to.type == TYPE_BRANCH) - while(p->to.u.branch != P && p->to.u.branch->as == ANOP) - p->to.u.branch = p->to.u.branch->link; - } - - if(debug['R']) { - if(ostats.ncvtreg || - ostats.nspill || - ostats.nreload || - ostats.ndelmov || - ostats.nvar || - ostats.naddr || - 0) - print("\nstats\n"); - - if(ostats.ncvtreg) - print(" %4d cvtreg\n", ostats.ncvtreg); - if(ostats.nspill) - print(" %4d spill\n", ostats.nspill); - if(ostats.nreload) - print(" %4d reload\n", ostats.nreload); - if(ostats.ndelmov) - print(" %4d delmov\n", ostats.ndelmov); - if(ostats.nvar) - print(" %4d var\n", ostats.nvar); - if(ostats.naddr) - print(" %4d addr\n", ostats.naddr); - - memset(&ostats, 0, sizeof(ostats)); - } -} - -static void -walkvardef(Node *n, Reg *r, int active) +char** +regnames(int *n) { - Reg *r1, *r2; - int bn; - Var *v; - - for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { - if(r1->f.active == active) - break; - r1->f.active = active; - if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) - break; - for(v=n->opt; v!=nil; v=v->nextinnode) { - bn = v - var; - biset(&r1->act, bn); - } - if(r1->f.prog->as == ACALL) - break; - } - - for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) - if(r2->f.s2 != nil) - walkvardef(n, (Reg*)r2->f.s2, active); + *n = NREGVAR; + return regname; } -/* - * add mov b,rn - * just after r - */ -void -addmove(Reg *r, int bn, int rn, int f) +uint64 +excludedregs(void) { - Prog *p, *p1; - Adr *a; - Var *v; - - p1 = mal(sizeof(*p1)); - clearp(p1); - p1->pc = 9999; - - p = r->f.prog; - p1->link = p->link; - p->link = p1; - p1->lineno = p->lineno; - - v = var + bn; - - a = &p1->to; - a->offset = v->offset; - a->etype = v->etype; - a->type = TYPE_MEM; - a->name = v->name; - a->node = v->node; - a->sym = linksym(v->node->sym); - - // need to clean this up with wptr and - // some of the defaults - p1->as = AMOVL; - switch(simtype[(uchar)v->etype]) { - default: - fatal("unknown type %E", v->etype); - case TINT8: - case TUINT8: - case TBOOL: - p1->as = AMOVB; - break; - case TINT16: - case TUINT16: - p1->as = AMOVW; - break; - case TINT64: - case TUINT64: - case TPTR64: - p1->as = AMOVQ; - break; - case TFLOAT32: - p1->as = AMOVSS; - break; - case TFLOAT64: - p1->as = AMOVSD; - break; - case TINT32: - case TUINT32: - case TPTR32: - break; - } - - p1->from.type = TYPE_REG; - p1->from.reg = rn; - p1->from.name = NAME_NONE; - if(!f) { - p1->from = *a; - *a = zprog.from; - a->type = TYPE_REG; - a->reg = rn; - if(v->etype == TUINT8) - p1->as = AMOVB; - if(v->etype == TUINT16) - p1->as = AMOVW; - } - if(debug['R'] && debug['v']) - print("%P ===add=== %P\n", p, p1); - ostats.nspill++; + return RtoB(REG_SP); } -uint32 +uint64 doregbits(int r) { - uint32 b; + uint64 b; b = 0; if(r >= REG_AX && r <= REG_R15) @@ -598,592 +105,19 @@ doregbits(int r) return b; } -static int -overlap(int64 o1, int w1, int64 o2, int w2) -{ - int64 t1, t2; - - t1 = o1+w1; - t2 = o2+w2; - - if(!(t1 > o2 && t2 > o1)) - return 0; - - return 1; -} - -Bits -mkvar(Reg *r, Adr *a) -{ - Var *v; - int i, n, et, z, flag; - int64 w; - uint32 regu; - int64 o; - Bits bit; - Node *node; - - /* - * mark registers used - */ - if(a->type == TYPE_NONE) - goto none; - - if(r != R) - r->use1.b[0] |= doregbits(a->index); - - switch(a->type) { - default: - regu = doregbits(a->reg); - if(regu == 0) - goto none; - bit = zbits; - bit.b[0] = regu; - return bit; - - case TYPE_ADDR: - a->type = TYPE_MEM; - bit = mkvar(r, a); - setaddrs(bit); - a->type = TYPE_ADDR; - ostats.naddr++; - goto none; - - case TYPE_MEM: - switch(a->name) { - default: - goto none; - case NAME_EXTERN: - case NAME_STATIC: - case NAME_PARAM: - case NAME_AUTO: - n = a->name; - break; - } - } - - node = a->node; - if(node == N || node->op != ONAME || node->orig == N) - goto none; - node = node->orig; - if(node->orig != node) - fatal("%D: bad node", a); - if(node->sym == S || node->sym->name[0] == '.') - goto none; - et = a->etype; - o = a->offset; - w = a->width; - if(w < 0) - fatal("bad width %lld for %D", w, a); - - flag = 0; - for(i=0; inode == node && v->name == n) { - if(v->offset == o) - if(v->etype == et) - if(v->width == w) - return blsh(i); - - // if they overlaps, disable both - if(overlap(v->offset, v->width, o, w)) { -// print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et); - v->addr = 1; - flag = 1; - } - } - } - switch(et) { - case 0: - case TFUNC: - goto none; - } - - if(nvar >= NVAR) { - if(debug['w'] > 1 && node != N) - fatal("variable not optimized: %#N", node); - - // If we're not tracking a word in a variable, mark the rest as - // having its address taken, so that we keep the whole thing - // live at all calls. otherwise we might optimize away part of - // a variable but not all of it. - for(i=0; inode == node) - v->addr = 1; - } - goto none; - } - - i = nvar; - nvar++; - v = var+i; - v->offset = o; - v->name = n; - v->etype = et; - v->width = w; - v->addr = flag; // funny punning - v->node = node; - - // node->opt is the head of a linked list - // of Vars within the given Node, so that - // we can start at a Var and find all the other - // Vars in the same Go variable. - v->nextinnode = node->opt; - node->opt = v; - - bit = blsh(i); - if(n == NAME_EXTERN || n == NAME_STATIC) - for(z=0; zclass == PPARAM) - for(z=0; zclass == PPARAMOUT) - for(z=0; zaddrtaken) - v->addr = 1; - - // Disable registerization for globals, because: - // (1) we might panic at any time and we want the recovery code - // to see the latest values (issue 1304). - // (2) we don't know what pointers might point at them and we want - // loads via those pointers to see updated values and vice versa (issue 7995). - // - // Disable registerization for results if using defer, because the deferred func - // might recover and return, causing the current values to be used. - if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) - v->addr = 1; - - if(debug['R']) - print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); - ostats.nvar++; - - return bit; - -none: - return zbits; -} - -void -prop(Reg *r, Bits ref, Bits cal) -{ - Reg *r1, *r2; - int z, i, j; - Var *v, *v1; - - for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { - for(z=0; zrefahead.b[z]; - if(ref.b[z] != r1->refahead.b[z]) { - r1->refahead.b[z] = ref.b[z]; - change++; - } - cal.b[z] |= r1->calahead.b[z]; - if(cal.b[z] != r1->calahead.b[z]) { - r1->calahead.b[z] = cal.b[z]; - change++; - } - } - switch(r1->f.prog->as) { - case ACALL: - if(noreturn(r1->f.prog)) - break; - - // Mark all input variables (ivar) as used, because that's what the - // liveness bitmaps say. The liveness bitmaps say that so that a - // panic will not show stale values in the parameter dump. - // Mark variables with a recent VARDEF (r1->act) as used, - // so that the optimizer flushes initializations to memory, - // so that if a garbage collection happens during this CALL, - // the collector will see initialized memory. Again this is to - // match what the liveness bitmaps say. - for(z=0; zact.b[z]; - ref.b[z] = 0; - } - - // cal.b is the current approximation of what's live across the call. - // Every bit in cal.b is a single stack word. For each such word, - // find all the other tracked stack words in the same Go variable - // (struct/slice/string/interface) and mark them live too. - // This is necessary because the liveness analysis for the garbage - // collector works at variable granularity, not at word granularity. - // It is fundamental for slice/string/interface: the garbage collector - // needs the whole value, not just some of the words, in order to - // interpret the other bits correctly. Specifically, slice needs a consistent - // ptr and cap, string needs a consistent ptr and len, and interface - // needs a consistent type word and data word. - for(z=0; z= nvar || ((cal.b[z]>>i)&1) == 0) - continue; - v = var+z*64+i; - if(v->node->opt == nil) // v represents fixed register, not Go variable - continue; - - // v->node->opt is the head of a linked list of Vars - // corresponding to tracked words from the Go variable v->node. - // Walk the list and set all the bits. - // For a large struct this could end up being quadratic: - // after the first setting, the outer loop (for z, i) would see a 1 bit - // for all of the remaining words in the struct, and for each such - // word would go through and turn on all the bits again. - // To avoid the quadratic behavior, we only turn on the bits if - // v is the head of the list or if the head's bit is not yet turned on. - // This will set the bits at most twice, keeping the overall loop linear. - v1 = v->node->opt; - j = v1 - var; - if(v == v1 || !btest(&cal, j)) { - for(; v1 != nil; v1 = v1->nextinnode) { - j = v1 - var; - biset(&cal, j); - } - } - } - } - break; - - case ATEXT: - for(z=0; zset.b[z]) | - r1->use1.b[z] | r1->use2.b[z]; - cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); - r1->refbehind.b[z] = ref.b[z]; - r1->calbehind.b[z] = cal.b[z]; - } - if(r1->f.active) - break; - r1->f.active = 1; - } - for(; r != r1; r = (Reg*)r->f.p1) - for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) - prop(r2, r->refbehind, r->calbehind); -} - -void -synch(Reg *r, Bits dif) -{ - Reg *r1; - int z; - - for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { - for(z=0; zrefbehind.b[z] & r1->refahead.b[z])) | - r1->set.b[z] | r1->regdiff.b[z]; - if(dif.b[z] != r1->regdiff.b[z]) { - r1->regdiff.b[z] = dif.b[z]; - change++; - } - } - if(r1->f.active) - break; - r1->f.active = 1; - for(z=0; zcalbehind.b[z] & r1->calahead.b[z]); - if(r1->f.s2 != nil) - synch((Reg*)r1->f.s2, dif); - } -} - -uint32 -allreg(uint32 b, Rgn *r) -{ - Var *v; - int i; - - v = var + r->varno; - r->regno = 0; - switch(v->etype) { - - default: - fatal("unknown etype %d/%E", bitno(b), v->etype); - break; - - case TINT8: - case TUINT8: - case TINT16: - case TUINT16: - case TINT32: - case TUINT32: - case TINT64: - case TUINT64: - case TINT: - case TUINT: - case TUINTPTR: - case TBOOL: - case TPTR32: - case TPTR64: - i = BtoR(~b); - if(i && r->cost > 0) { - r->regno = i; - return RtoB(i); - } - break; - - case TFLOAT32: - case TFLOAT64: - i = BtoF(~b); - if(i && r->cost > 0) { - r->regno = i; - return FtoB(i); - } - break; - } - return 0; -} - -void -paint1(Reg *r, int bn) -{ - Reg *r1; - int z; - uint64 bb; - - z = bn/64; - bb = 1LL<<(bn%64); - if(r->act.b[z] & bb) - return; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(r1->act.b[z] & bb) - break; - r = r1; - } - - if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { - change -= CLOAD * r->f.loop; - } - for(;;) { - r->act.b[z] |= bb; - - if(r->f.prog->as != ANOP) { // don't give credit for NOPs - if(r->use1.b[z] & bb) - change += CREF * r->f.loop; - if((r->use2.b[z]|r->set.b[z]) & bb) - change += CREF * r->f.loop; - } - - if(STORE(r) & r->regdiff.b[z] & bb) { - change -= CLOAD * r->f.loop; - } - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - paint1(r1, bn); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - paint1(r1, bn); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(r->act.b[z] & bb) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } -} - -uint32 -paint2(Reg *r, int bn, int depth) -{ - Reg *r1; - int z; - uint64 bb, vreg; - - z = bn/64; - bb = 1LL << (bn%64); - vreg = regbits; - if(!(r->act.b[z] & bb)) - return vreg; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(!(r1->act.b[z] & bb)) - break; - r = r1; - } - for(;;) { - if(debug['R'] && debug['v']) - print(" paint2 %d %P\n", depth, r->f.prog); - - r->act.b[z] &= ~bb; - - vreg |= r->regu; - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - vreg |= paint2(r1, bn, depth+1); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - vreg |= paint2(r1, bn, depth+1); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(!(r->act.b[z] & bb)) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } - - return vreg; -} - -void -paint3(Reg *r, int bn, uint32 rb, int rn) -{ - Reg *r1; - Prog *p; - int z; - uint64 bb; - - z = bn/64; - bb = 1LL << (bn%64); - if(r->act.b[z] & bb) - return; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(r1->act.b[z] & bb) - break; - r = r1; - } - - if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) - addmove(r, bn, rn, 0); - for(;;) { - r->act.b[z] |= bb; - p = r->f.prog; - - if(r->use1.b[z] & bb) { - if(debug['R'] && debug['v']) - print("%P", p); - addreg(&p->from, rn); - if(debug['R'] && debug['v']) - print(" ===change== %P\n", p); - } - if((r->use2.b[z]|r->set.b[z]) & bb) { - if(debug['R'] && debug['v']) - print("%P", p); - addreg(&p->to, rn); - if(debug['R'] && debug['v']) - print(" ===change== %P\n", p); - } - - if(STORE(r) & r->regdiff.b[z] & bb) - addmove(r, bn, rn, 1); - r->regu |= rb; - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - paint3(r1, bn, rb, rn); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - paint3(r1, bn, rb, rn); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(r->act.b[z] & bb) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } -} - -void -addreg(Adr *a, int rn) -{ - a->sym = nil; - a->node = nil; - a->offset = 0; - a->type = TYPE_REG; - a->reg = rn; - a->name = 0; - - ostats.ncvtreg++; -} - -uint32 +uint64 RtoB(int r) { if(r < REG_AX || r > REG_R15) return 0; - return 1L << (r-REG_AX); + return 1ULL << (r-REG_AX); } int -BtoR(uint32 b) +BtoR(uint64 b) { - b &= 0xffffL; + b &= 0xffffULL; if(nacl) b &= ~((1<<(REG_BP-REG_AX)) | (1<<(REG_R15-REG_AX))); else if(framepointer_enabled) @@ -1200,16 +134,16 @@ BtoR(uint32 b) * ... * 31 X15 */ -uint32 +uint64 FtoB(int f) { if(f < REG_X0 || f > REG_X15) return 0; - return 1L << (f - REG_X0 + 16); + return 1ULL << (f - REG_X0 + 16); } int -BtoF(uint32 b) +BtoF(uint64 b) { b &= 0xFFFF0000L; @@ -1217,77 +151,3 @@ BtoF(uint32 b) return 0; return bitno(b) - 16 + REG_X0; } - -void -dumpone(Flow *f, int isreg) -{ - int z; - Bits bit; - Reg *r; - - print("%d:%P", f->loop, f->prog); - if(isreg) { - r = (Reg*)f; - for(z=0; zset.b[z] | - r->use1.b[z] | - r->use2.b[z] | - r->refbehind.b[z] | - r->refahead.b[z] | - r->calbehind.b[z] | - r->calahead.b[z] | - r->regdiff.b[z] | - r->act.b[z] | - 0; - if(bany(&bit)) { - print("\t"); - if(bany(&r->set)) - print(" s:%Q", r->set); - if(bany(&r->use1)) - print(" u1:%Q", r->use1); - if(bany(&r->use2)) - print(" u2:%Q", r->use2); - if(bany(&r->refbehind)) - print(" rb:%Q ", r->refbehind); - if(bany(&r->refahead)) - print(" ra:%Q ", r->refahead); - if(bany(&r->calbehind)) - print(" cb:%Q ", r->calbehind); - if(bany(&r->calahead)) - print(" ca:%Q ", r->calahead); - if(bany(&r->regdiff)) - print(" d:%Q ", r->regdiff); - if(bany(&r->act)) - print(" a:%Q ", r->act); - } - } - print("\n"); -} - -void -dumpit(char *str, Flow *r0, int isreg) -{ - Flow *r, *r1; - - print("\n%s\n", str); - for(r = r0; r != nil; r = r->link) { - dumpone(r, isreg); - r1 = r->p2; - if(r1 != nil) { - print(" pred:"); - for(; r1 != nil; r1 = r1->p2link) - print(" %.4ud", (int)r1->prog->pc); - print("\n"); - } - // Print successors if it's not just the next one - if(r->s1 != r->link || r->s2 != nil) { - print(" succ:"); - if(r->s1 != nil) - print(" %.4ud", (int)r->s1->prog->pc); - if(r->s2 != nil) - print(" %.4ud", (int)r->s2->prog->pc); - print("\n"); - } - } -} diff --git a/src/cmd/8g/galign.c b/src/cmd/8g/galign.c index 3fb48ad5e4..3ee3dc2f3a 100644 --- a/src/cmd/8g/galign.c +++ b/src/cmd/8g/galign.c @@ -66,14 +66,22 @@ main(int argc, char **argv) arch.ginscall = ginscall; arch.igen = igen; arch.linkarchinit = linkarchinit; + arch.peep = peep; arch.proginfo = proginfo; arch.regalloc = regalloc; arch.regfree = regfree; - arch.regopt = regopt; arch.regtyp = regtyp; arch.sameaddr = sameaddr; arch.smallindir = smallindir; arch.stackaddr = stackaddr; + arch.excludedregs = excludedregs; + arch.RtoB = RtoB; + arch.FtoB = FtoB; + arch.BtoR = BtoR; + arch.BtoF = BtoF; + arch.optoas = optoas; + arch.doregbits = doregbits; + arch.regnames = regnames; gcmain(argc, argv); } diff --git a/src/cmd/8g/gg.h b/src/cmd/8g/gg.h index 46a61b4d1c..872d946592 100644 --- a/src/cmd/8g/gg.h +++ b/src/cmd/8g/gg.h @@ -171,3 +171,19 @@ int sameaddr(Addr*, Addr*); int smallindir(Addr*, Addr*); int stackaddr(Addr*); Prog* unpatch(Prog*); + +/* + * reg.c + */ +uint64 excludedregs(void); +uint64 RtoB(int); +uint64 FtoB(int); +int BtoR(uint64); +int BtoF(uint64); +uint64 doregbits(int); +char** regnames(int*); + +/* + * peep.c + */ +void peep(Prog*); diff --git a/src/cmd/8g/ggen.c b/src/cmd/8g/ggen.c index 4cd159edcd..8188348282 100644 --- a/src/cmd/8g/ggen.c +++ b/src/cmd/8g/ggen.c @@ -7,7 +7,7 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" static Prog *appendpp(Prog*, int, int, int, vlong, int, int, vlong); static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax); diff --git a/src/cmd/8g/gsubr.c b/src/cmd/8g/gsubr.c index 76c8e2d14f..959ef20592 100644 --- a/src/cmd/8g/gsubr.c +++ b/src/cmd/8g/gsubr.c @@ -187,6 +187,14 @@ optoas(int op, Type *t) case CASE(OAS, TPTR32): a = AMOVL; break; + + case CASE(OAS, TFLOAT32): + a = AMOVSS; + break; + + case CASE(OAS, TFLOAT64): + a = AMOVSD; + break; case CASE(OADD, TINT8): case CASE(OADD, TUINT8): diff --git a/src/cmd/8g/opt.h b/src/cmd/8g/opt.h deleted file mode 100644 index 8378d5d456..0000000000 --- a/src/cmd/8g/opt.h +++ /dev/null @@ -1,192 +0,0 @@ -// Derived from Inferno utils/6c/gc.h -// http://code.google.com/p/inferno-os/source/browse/utils/6c/gc.h -// -// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. -// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) -// Portions Copyright © 1997-1999 Vita Nuova Limited -// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) -// Portions Copyright © 2004,2006 Bruce Ellis -// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) -// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others -// Portions Copyright © 2009 The Go Authors. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#define Z N -#define Adr Addr - -#define BLOAD(r) band(bnot(r->refbehind), r->refahead) -#define BSTORE(r) band(bnot(r->calbehind), r->calahead) -#define LOAD(r) (~r->refbehind.b[z] & r->refahead.b[z]) -#define STORE(r) (~r->calbehind.b[z] & r->calahead.b[z]) - -#define CLOAD 5 -#define CREF 5 -#define CINF 1000 -#define LOOP 3 - -typedef struct Reg Reg; -typedef struct Rgn Rgn; - -/*c2go -extern Node *Z; -enum -{ - CLOAD = 5, - CREF = 5, - CINF = 1000, - LOOP = 3, -}; - -uint32 BLOAD(Reg*); -uint32 BSTORE(Reg*); -uint64 LOAD(Reg*); -uint64 STORE(Reg*); -*/ - -// A Reg is a wrapper around a single Prog (one instruction) that holds -// register optimization information while the optimizer runs. -// r->prog is the instruction. -// r->prog->opt points back to r. -struct Reg -{ - Flow f; - - Bits set; // regopt variables written by this instruction. - Bits use1; // regopt variables read by prog->from. - Bits use2; // regopt variables read by prog->to. - - // refahead/refbehind are the regopt variables whose current - // value may be used in the following/preceding instructions - // up to a CALL (or the value is clobbered). - Bits refbehind; - Bits refahead; - // calahead/calbehind are similar, but for variables in - // instructions that are reachable after hitting at least one - // CALL. - Bits calbehind; - Bits calahead; - Bits regdiff; - Bits act; - - int32 regu; // register used bitmap - int32 rpo; // reverse post ordering - int32 active; - - uint16 loop; // x5 for every loop - uchar refset; // diagnostic generated - - Reg* p1; // predecessors of this instruction: p1, - Reg* p2; // and then p2 linked though p2link. - Reg* p2link; - Reg* s1; // successors of this instruction (at most two: s1 and s2). - Reg* s2; - Reg* link; // next instruction in function code - Prog* prog; // actual instruction -}; -#define R ((Reg*)0) -/*c2go extern Reg *R; */ - -#define NRGN 600 -/*c2go enum { NRGN = 600 }; */ - -// A Rgn represents a single regopt variable over a region of code -// where a register could potentially be dedicated to that variable. -// The code encompassed by a Rgn is defined by the flow graph, -// starting at enter, flood-filling forward while varno is refahead -// and backward while varno is refbehind, and following branches. A -// single variable may be represented by multiple disjoint Rgns and -// each Rgn may choose a different register for that variable. -// Registers are allocated to regions greedily in order of descending -// cost. -struct Rgn -{ - Reg* enter; - short cost; - short varno; - short regno; -}; - -EXTERN int32 exregoffset; // not set -EXTERN int32 exfregoffset; // not set -EXTERN Reg zreg; -EXTERN Reg* freer; -EXTERN Reg** rpo2r; -EXTERN Rgn region[NRGN]; -EXTERN Rgn* rgp; -EXTERN int nregion; -EXTERN int nvar; -EXTERN int32 regbits; -EXTERN int32 exregbits; -EXTERN Bits externs; -EXTERN Bits params; -EXTERN Bits consts; -EXTERN Bits addrs; -EXTERN Bits ivar; -EXTERN Bits ovar; -EXTERN int change; -EXTERN int32 maxnr; -EXTERN int32* idom; - -EXTERN struct -{ - int32 ncvtreg; - int32 nspill; - int32 nreload; - int32 ndelmov; - int32 nvar; - int32 naddr; -} ostats; - -/* - * reg.c - */ -Reg* rega(void); -int rcmp(const void*, const void*); -void regopt(Prog*); -void addmove(Reg*, int, int, int); -Bits mkvar(Reg*, Adr*); -void prop(Reg*, Bits, Bits); -void loopit(Reg*, int32); -void synch(Reg*, Bits); -uint32 allreg(uint32, Rgn*); -void paint1(Reg*, int); -uint32 paint2(Reg*, int, int); -void paint3(Reg*, int, uint32, int); -void addreg(Adr*, int); -void dumpone(Flow*, int); -void dumpit(char*, Flow*, int); - -/* - * peep.c - */ -void peep(Prog*); -void excise(Flow*); -int copyu(Prog*, Adr*, Adr*); - -uint32 RtoB(int); -uint32 FtoB(int); -int BtoR(uint32); -int BtoF(uint32); - -/* - * prog.c - */ -void proginfo(ProgInfo*, Prog*); diff --git a/src/cmd/8g/peep.c b/src/cmd/8g/peep.c index 6c0865a7e8..9b514a8964 100644 --- a/src/cmd/8g/peep.c +++ b/src/cmd/8g/peep.c @@ -31,10 +31,11 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" enum { REGEXT = 0, + exregoffset = REG_DI, }; static void conprop(Flow *r); @@ -45,6 +46,7 @@ static int copy1(Adr*, Adr*, Flow*, int); static int copyas(Adr*, Adr*); static int copyau(Adr*, Adr*); static int copysub(Adr*, Adr*, Adr*, int); +static int copyu(Prog*, Adr*, Adr*); static uint32 gactive; @@ -535,7 +537,7 @@ copy1(Adr *v1, Adr *v2, Flow *r, int f) * 4 if set and used * 0 otherwise (not touched) */ -int +static int copyu(Prog *p, Adr *v, Adr *s) { ProgInfo info; diff --git a/src/cmd/8g/prog.c b/src/cmd/8g/prog.c index 8a7371b5c4..e77a026a93 100644 --- a/src/cmd/8g/prog.c +++ b/src/cmd/8g/prog.c @@ -5,7 +5,7 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" // Matches real RtoB but can be used in global initializer. #define RtoB(r) (1<<((r)-REG_AX)) diff --git a/src/cmd/8g/reg.c b/src/cmd/8g/reg.c index 7d2de53549..0470bdf7b5 100644 --- a/src/cmd/8g/reg.c +++ b/src/cmd/8g/reg.c @@ -31,523 +31,34 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" -#define NREGVAR 16 /* 8 integer + 8 floating */ -#define REGBITS ((uint64)0xffffull) -/*c2go enum { - NREGVAR = 16, - REGBITS = (1<cost; - c2 = p1->cost; - if(c1 -= c2) - return c1; - return p2->varno - p1->varno; -} - -static void -setaddrs(Bits bit) -{ - int i, n; - Var *v; - Node *node; - - while(bany(&bit)) { - // convert each bit to a variable - i = bnum(bit); - node = var[i].node; - n = var[i].name; - biclr(&bit, i); - - // disable all pieces of that variable - for(i=0; inode == node && v->name == n) - v->addr = 2; - } - } -} static char* regname[] = { ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di", ".x0", ".x1", ".x2", ".x3", ".x4", ".x5", ".x6", ".x7", }; -static Node* regnodes[NREGVAR]; - -static void walkvardef(Node *n, Reg *r, int active); - -void -regopt(Prog *firstp) +char** +regnames(int *n) { - Reg *r, *r1; - Prog *p; - Graph *g; - ProgInfo info; - int i, z, active; - uint32 vreg; - Bits bit; - - if(first) { - fmtinstall('Q', Qconv); - exregoffset = REG_DI; // no externals - first = 0; - } - - mergetemp(firstp); - - /* - * control flow is more complicated in generated go code - * than in generated c code. define pseudo-variables for - * registers, so we have complete register usage information. - */ - nvar = NREGVAR; - memset(var, 0, NREGVAR*sizeof var[0]); - for(i=0; iopt = nil; - return; - } - - firstr = (Reg*)g->start; - - for(r = firstr; r != R; r = (Reg*)r->f.link) { - p = r->f.prog; - if(p->as == AVARDEF || p->as == AVARKILL) - continue; - proginfo(&info, p); - - // Avoid making variables for direct-called functions. - if(p->as == ACALL && p->to.type == TYPE_MEM && p->to.name == NAME_EXTERN) - continue; - - r->use1.b[0] |= info.reguse | info.regindex; - r->set.b[0] |= info.regset; - - bit = mkvar(r, &p->from); - if(bany(&bit)) { - if(info.flags & LeftAddr) - setaddrs(bit); - if(info.flags & LeftRead) - for(z=0; zuse1.b[z] |= bit.b[z]; - if(info.flags & LeftWrite) - for(z=0; zset.b[z] |= bit.b[z]; - } - - bit = mkvar(r, &p->to); - if(bany(&bit)) { - if(info.flags & RightAddr) - setaddrs(bit); - if(info.flags & RightRead) - for(z=0; zuse2.b[z] |= bit.b[z]; - if(info.flags & RightWrite) - for(z=0; zset.b[z] |= bit.b[z]; - } - } - if(firstr == R) - return; - - for(i=0; iaddr) { - bit = blsh(i); - for(z=0; zaddr, v->etype, v->width, v->node, v->offset); - } - - if(debug['R'] && debug['v']) - dumpit("pass1", &firstr->f, 1); - - /* - * pass 2 - * find looping structure - */ - flowrpo(g); - - if(debug['R'] && debug['v']) - dumpit("pass2", &firstr->f, 1); - - /* - * pass 2.5 - * iterate propagating fat vardef covering forward - * r->act records vars with a VARDEF since the last CALL. - * (r->act will be reused in pass 5 for something else, - * but we'll be done with it by then.) - */ - active = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) { - r->f.active = 0; - r->act = zbits; - } - for(r = firstr; r != R; r = (Reg*)r->f.link) { - p = r->f.prog; - if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) { - active++; - walkvardef(p->to.node, r, active); - } - } - - /* - * pass 3 - * iterate propagating usage - * back until flow graph is complete - */ -loop1: - change = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->f.active = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - if(r->f.prog->as == ARET) - prop(r, zbits, zbits); -loop11: - /* pick up unreachable code */ - i = 0; - for(r = firstr; r != R; r = r1) { - r1 = (Reg*)r->f.link; - if(r1 && r1->f.active && !r->f.active) { - prop(r, zbits, zbits); - i = 1; - } - } - if(i) - goto loop11; - if(change) - goto loop1; - - if(debug['R'] && debug['v']) - dumpit("pass3", &firstr->f, 1); - - /* - * pass 4 - * iterate propagating register/variable synchrony - * forward until graph is complete - */ -loop2: - change = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->f.active = 0; - synch(firstr, zbits); - if(change) - goto loop2; - - if(debug['R'] && debug['v']) - dumpit("pass4", &firstr->f, 1); - - /* - * pass 4.5 - * move register pseudo-variables into regu. - */ - for(r = firstr; r != R; r = (Reg*)r->f.link) { - r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; - - r->set.b[0] &= ~REGBITS; - r->use1.b[0] &= ~REGBITS; - r->use2.b[0] &= ~REGBITS; - r->refbehind.b[0] &= ~REGBITS; - r->refahead.b[0] &= ~REGBITS; - r->calbehind.b[0] &= ~REGBITS; - r->calahead.b[0] &= ~REGBITS; - r->regdiff.b[0] &= ~REGBITS; - r->act.b[0] &= ~REGBITS; - } - - /* - * pass 5 - * isolate regions - * calculate costs (paint1) - */ - r = firstr; - if(r) { - for(z=0; zrefahead.b[z] | r->calahead.b[z]) & - ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); - if(bany(&bit) && !r->f.refset) { - // should never happen - all variables are preset - if(debug['w']) - print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); - r->f.refset = 1; - } - } - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->act = zbits; - rgp = region; - nregion = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) { - for(z=0; zset.b[z] & - ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); - if(bany(&bit) && !r->f.refset) { - if(debug['w']) - print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); - r->f.refset = 1; - excise(&r->f); - } - for(z=0; zact.b[z] | addrs.b[z]); - while(bany(&bit)) { - i = bnum(bit); - rgp->enter = r; - rgp->varno = i; - change = 0; - paint1(r, i); - biclr(&bit, i); - if(change <= 0) - continue; - rgp->cost = change; - nregion++; - if(nregion >= NRGN) { - if(debug['R'] && debug['v']) - print("too many regions\n"); - goto brk; - } - rgp++; - } - } -brk: - qsort(region, nregion, sizeof(region[0]), rcmp); - - /* - * pass 6 - * determine used registers (paint2) - * replace code (paint3) - */ - rgp = region; - if(debug['R'] && debug['v']) - print("\nregisterizing\n"); - for(i=0; icost, rgp->varno, rgp->enter->f.prog->pc); - bit = blsh(rgp->varno); - vreg = paint2(rgp->enter, rgp->varno, 0); - vreg = allreg(vreg, rgp); - if(rgp->regno != 0) - paint3(rgp->enter, rgp->varno, vreg, rgp->regno); - rgp++; - } - - /* - * free aux structures. peep allocates new ones. - */ - for(i=0; iopt = nil; - flowend(g); - firstr = R; - - if(debug['R'] && debug['v']) { - // Rebuild flow graph, since we inserted instructions - g = flowstart(firstp, sizeof(Reg)); - firstr = (Reg*)g->start; - dumpit("pass6", &firstr->f, 1); - flowend(g); - firstr = R; - } - - /* - * pass 7 - * peep-hole on basic block - */ - if(!debug['R'] || debug['P']) - peep(firstp); - - /* - * eliminate nops - */ - for(p=firstp; p!=P; p=p->link) { - while(p->link != P && p->link->as == ANOP) - p->link = p->link->link; - if(p->to.type == TYPE_BRANCH) - while(p->to.u.branch != P && p->to.u.branch->as == ANOP) - p->to.u.branch = p->to.u.branch->link; - } - - if(!use_sse) - for(p=firstp; p!=P; p=p->link) { - if(p->from.reg >= REG_X0 && p->from.reg <= REG_X7) - fatal("invalid use of %R with GO386=387: %P", p->from.reg, p); - if(p->to.reg >= REG_X0 && p->to.reg <= REG_X7) - fatal("invalid use of %R with GO386=387: %P", p->to.reg, p); - } - - if(debug['R']) { - if(ostats.ncvtreg || - ostats.nspill || - ostats.nreload || - ostats.ndelmov || - ostats.nvar || - ostats.naddr || - 0) - print("\nstats\n"); - - if(ostats.ncvtreg) - print(" %4d cvtreg\n", ostats.ncvtreg); - if(ostats.nspill) - print(" %4d spill\n", ostats.nspill); - if(ostats.nreload) - print(" %4d reload\n", ostats.nreload); - if(ostats.ndelmov) - print(" %4d delmov\n", ostats.ndelmov); - if(ostats.nvar) - print(" %4d var\n", ostats.nvar); - if(ostats.naddr) - print(" %4d addr\n", ostats.naddr); - - memset(&ostats, 0, sizeof(ostats)); - } + *n = NREGVAR; + return regname; } -static void -walkvardef(Node *n, Reg *r, int active) +uint64 +excludedregs(void) { - Reg *r1, *r2; - int bn; - Var *v; - - for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { - if(r1->f.active == active) - break; - r1->f.active = active; - if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) - break; - for(v=n->opt; v!=nil; v=v->nextinnode) { - bn = v - var; - biset(&r1->act, bn); - } - if(r1->f.prog->as == ACALL) - break; - } - - for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) - if(r2->f.s2 != nil) - walkvardef(n, (Reg*)r2->f.s2, active); + return RtoB(REG_SP); } -/* - * add mov b,rn - * just after r - */ -void -addmove(Reg *r, int bn, int rn, int f) -{ - Prog *p, *p1; - Adr *a; - Var *v; - - p1 = mal(sizeof(*p1)); - clearp(p1); - p1->pc = 9999; - - p = r->f.prog; - p1->link = p->link; - p->link = p1; - p1->lineno = p->lineno; - - v = var + bn; - - a = &p1->to; - a->offset = v->offset; - a->etype = v->etype; - a->type = TYPE_MEM; - a->name = v->name; - a->node = v->node; - a->sym = linksym(v->node->sym); - - // need to clean this up with wptr and - // some of the defaults - p1->as = AMOVL; - switch(v->etype) { - default: - fatal("unknown type %E", v->etype); - case TINT8: - case TUINT8: - case TBOOL: - p1->as = AMOVB; - break; - case TINT16: - case TUINT16: - p1->as = AMOVW; - break; - case TFLOAT32: - p1->as = AMOVSS; - break; - case TFLOAT64: - p1->as = AMOVSD; - break; - case TINT: - case TUINT: - case TINT32: - case TUINT32: - case TPTR32: - break; - } - - p1->from.type = TYPE_REG; - p1->from.reg = rn; - p1->from.name = 0; - if(!f) { - p1->from = *a; - *a = zprog.from; - a->type = TYPE_REG; - a->reg = rn; - if(v->etype == TUINT8) - p1->as = AMOVB; - if(v->etype == TUINT16) - p1->as = AMOVW; - } - if(debug['R'] && debug['v']) - print("%P ===add=== %P\n", p, p1); - ostats.nspill++; -} - -uint32 +uint64 doregbits(int r) { - uint32 b; + uint64 b; b = 0; if(r >= REG_AX && r <= REG_DI) @@ -564,605 +75,17 @@ doregbits(int r) return b; } -static int -overlap(int32 o1, int w1, int32 o2, int w2) -{ - int32 t1, t2; - - t1 = o1+w1; - t2 = o2+w2; - - if(!(t1 > o2 && t2 > o1)) - return 0; - - return 1; -} - -Bits -mkvar(Reg *r, Adr *a) -{ - Var *v; - int i, n, et, z, w, flag, regu; - int32 o; - Bits bit; - Node *node; - - /* - * mark registers used - */ - if(a->type == TYPE_NONE) - goto none; - - if(r != R) - r->use1.b[0] |= doregbits(a->index); - - switch(a->type) { - default: - regu = doregbits(a->reg); - if(regu == 0) - goto none; - bit = zbits; - bit.b[0] = regu; - return bit; - - case TYPE_ADDR: - a->type = TYPE_MEM; - bit = mkvar(r, a); - setaddrs(bit); - a->type = TYPE_ADDR; - ostats.naddr++; - goto none; - - case TYPE_MEM: - switch(a->name) { - default: - goto none; - case NAME_EXTERN: - case NAME_STATIC: - case NAME_PARAM: - case NAME_AUTO: - n = a->name; - break; - } - } - - node = a->node; - if(node == N || node->op != ONAME || node->orig == N) - goto none; - node = node->orig; - if(node->orig != node) - fatal("%D: bad node", a); - if(node->sym == S || node->sym->name[0] == '.') - goto none; - et = a->etype; - o = a->offset; - w = a->width; - if(w < 0) - fatal("bad width %d for %D", w, a); - - flag = 0; - for(i=0; inode == node && v->name == n) { - if(v->offset == o) - if(v->etype == et) - if(v->width == w) - return blsh(i); - - // if they overlap, disable both - if(overlap(v->offset, v->width, o, w)) { - if(debug['R']) - print("disable %s\n", node->sym->name); - v->addr = 1; - flag = 1; - } - } - } - - switch(et) { - case 0: - case TFUNC: - goto none; - } - - if(nvar >= NVAR) { - if(debug['w'] > 1 && node != N) - fatal("variable not optimized: %D", a); - - // If we're not tracking a word in a variable, mark the rest as - // having its address taken, so that we keep the whole thing - // live at all calls. otherwise we might optimize away part of - // a variable but not all of it. - for(i=0; inode == node) - v->addr = 1; - } - goto none; - } - - i = nvar; - nvar++; - v = var+i; - v->offset = o; - v->name = n; - v->etype = et; - v->width = w; - v->addr = flag; // funny punning - v->node = node; - - // node->opt is the head of a linked list - // of Vars within the given Node, so that - // we can start at a Var and find all the other - // Vars in the same Go variable. - v->nextinnode = node->opt; - node->opt = v; - - bit = blsh(i); - if(n == NAME_EXTERN || n == NAME_STATIC) - for(z=0; zclass == PPARAM) - for(z=0; zclass == PPARAMOUT) - for(z=0; zaddrtaken) - v->addr = 1; - - // Disable registerization for globals, because: - // (1) we might panic at any time and we want the recovery code - // to see the latest values (issue 1304). - // (2) we don't know what pointers might point at them and we want - // loads via those pointers to see updated values and vice versa (issue 7995). - // - // Disable registerization for results if using defer, because the deferred func - // might recover and return, causing the current values to be used. - if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) - v->addr = 1; - - if(debug['R']) - print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); - ostats.nvar++; - - return bit; - -none: - return zbits; -} - -void -prop(Reg *r, Bits ref, Bits cal) -{ - Reg *r1, *r2; - int z, i, j; - Var *v, *v1; - - for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { - for(z=0; zrefahead.b[z]; - if(ref.b[z] != r1->refahead.b[z]) { - r1->refahead.b[z] = ref.b[z]; - change++; - } - cal.b[z] |= r1->calahead.b[z]; - if(cal.b[z] != r1->calahead.b[z]) { - r1->calahead.b[z] = cal.b[z]; - change++; - } - } - switch(r1->f.prog->as) { - case ACALL: - if(noreturn(r1->f.prog)) - break; - - // Mark all input variables (ivar) as used, because that's what the - // liveness bitmaps say. The liveness bitmaps say that so that a - // panic will not show stale values in the parameter dump. - // Mark variables with a recent VARDEF (r1->act) as used, - // so that the optimizer flushes initializations to memory, - // so that if a garbage collection happens during this CALL, - // the collector will see initialized memory. Again this is to - // match what the liveness bitmaps say. - for(z=0; zact.b[z]; - ref.b[z] = 0; - } - - // cal.b is the current approximation of what's live across the call. - // Every bit in cal.b is a single stack word. For each such word, - // find all the other tracked stack words in the same Go variable - // (struct/slice/string/interface) and mark them live too. - // This is necessary because the liveness analysis for the garbage - // collector works at variable granularity, not at word granularity. - // It is fundamental for slice/string/interface: the garbage collector - // needs the whole value, not just some of the words, in order to - // interpret the other bits correctly. Specifically, slice needs a consistent - // ptr and cap, string needs a consistent ptr and len, and interface - // needs a consistent type word and data word. - for(z=0; z= nvar || ((cal.b[z]>>i)&1) == 0) - continue; - v = var+z*64+i; - if(v->node->opt == nil) // v represents fixed register, not Go variable - continue; - - // v->node->opt is the head of a linked list of Vars - // corresponding to tracked words from the Go variable v->node. - // Walk the list and set all the bits. - // For a large struct this could end up being quadratic: - // after the first setting, the outer loop (for z, i) would see a 1 bit - // for all of the remaining words in the struct, and for each such - // word would go through and turn on all the bits again. - // To avoid the quadratic behavior, we only turn on the bits if - // v is the head of the list or if the head's bit is not yet turned on. - // This will set the bits at most twice, keeping the overall loop linear. - v1 = v->node->opt; - j = v1 - var; - if(v == v1 || !btest(&cal, j)) { - for(; v1 != nil; v1 = v1->nextinnode) { - j = v1 - var; - biset(&cal, j); - } - } - } - } - break; - - case ATEXT: - for(z=0; zset.b[z]) | - r1->use1.b[z] | r1->use2.b[z]; - cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); - r1->refbehind.b[z] = ref.b[z]; - r1->calbehind.b[z] = cal.b[z]; - } - if(r1->f.active) - break; - r1->f.active = 1; - } - for(; r != r1; r = (Reg*)r->f.p1) - for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) - prop(r2, r->refbehind, r->calbehind); -} - -void -synch(Reg *r, Bits dif) -{ - Reg *r1; - int z; - - for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { - for(z=0; zrefbehind.b[z] & r1->refahead.b[z])) | - r1->set.b[z] | r1->regdiff.b[z]; - if(dif.b[z] != r1->regdiff.b[z]) { - r1->regdiff.b[z] = dif.b[z]; - change++; - } - } - if(r1->f.active) - break; - r1->f.active = 1; - for(z=0; zcalbehind.b[z] & r1->calahead.b[z]); - if((Reg*)r1->f.s2 != R) - synch((Reg*)r1->f.s2, dif); - } -} - -uint32 -allreg(uint32 b, Rgn *r) -{ - Var *v; - int i; - - v = var + r->varno; - r->regno = 0; - switch(v->etype) { - - default: - fatal("unknown etype %d/%E", bitno(b), v->etype); - break; - - case TINT8: - case TUINT8: - case TINT16: - case TUINT16: - case TINT32: - case TUINT32: - case TINT64: - case TINT: - case TUINT: - case TUINTPTR: - case TBOOL: - case TPTR32: - i = BtoR(~b); - if(i && r->cost > 0) { - r->regno = i; - return RtoB(i); - } - break; - - case TFLOAT32: - case TFLOAT64: - if(!use_sse) - break; - i = BtoF(~b); - if(i && r->cost > 0) { - r->regno = i; - return FtoB(i); - } - break; - } - return 0; -} - -void -paint1(Reg *r, int bn) -{ - Reg *r1; - Prog *p; - int z; - uint64 bb, rbz; - - z = bn/64; - bb = 1LL<<(bn%64); - if(r->act.b[z] & bb) - return; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(r1->act.b[z] & bb) - break; - r = r1; - } - - rbz = ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])); - if(LOAD(r) & rbz & bb) { - change -= CLOAD * r->f.loop; - } - for(;;) { - r->act.b[z] |= bb; - p = r->f.prog; - - if(r->f.prog->as != ANOP) { // don't give credit for NOPs - if(r->use1.b[z] & bb) { - change += CREF * r->f.loop; - if(p->as == AFMOVL || p->as == AFMOVW) - if(BtoR(bb) != REG_F0) - change = -CINF; - } - if((r->use2.b[z]|r->set.b[z]) & bb) { - change += CREF * r->f.loop; - if(p->as == AFMOVL || p->as == AFMOVW) - if(BtoR(bb) != REG_F0) - change = -CINF; - } - } - - if(STORE(r) & r->regdiff.b[z] & bb) { - change -= CLOAD * r->f.loop; - if(p->as == AFMOVL || p->as == AFMOVW) - if(BtoR(bb) != REG_F0) - change = -CINF; - } - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - paint1(r1, bn); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - paint1(r1, bn); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(r->act.b[z] & bb) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } -} - -uint32 -paint2(Reg *r, int bn, int depth) -{ - Reg *r1; - int z; - uint64 bb, vreg; - - z = bn/64; - bb = 1LL << (bn%64); - vreg = regbits; - if(!(r->act.b[z] & bb)) - return vreg; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(!(r1->act.b[z] & bb)) - break; - r = r1; - } - for(;;) { - if(debug['R'] && debug['v']) - print(" paint2 %d %P\n", depth, r->f.prog); - - r->act.b[z] &= ~bb; - - vreg |= r->regu; - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - vreg |= paint2(r1, bn, depth+1); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - vreg |= paint2(r1, bn, depth+1); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(!(r->act.b[z] & bb)) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } - - return vreg; -} - -void -paint3(Reg *r, int bn, uint32 rb, int rn) -{ - Reg *r1; - Prog *p; - int z; - uint64 bb, rbz; - - z = bn/64; - bb = 1LL << (bn%64); - if(r->act.b[z] & bb) - return; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(r1->act.b[z] & bb) - break; - r = r1; - } - - rbz = ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])); - if(LOAD(r) & rbz & bb) - addmove(r, bn, rn, 0); - for(;;) { - r->act.b[z] |= bb; - p = r->f.prog; - - if(r->use1.b[z] & bb) { - if(debug['R'] && debug['v']) - print("%P", p); - addreg(&p->from, rn); - if(debug['R'] && debug['v']) - print(" ===change== %P\n", p); - } - if((r->use2.b[z]|r->set.b[z]) & bb) { - if(debug['R'] && debug['v']) - print("%P", p); - addreg(&p->to, rn); - if(debug['R'] && debug['v']) - print(" ===change== %P\n", p); - } - - if(STORE(r) & r->regdiff.b[z] & bb) - addmove(r, bn, rn, 1); - r->regu |= rb; - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - paint3(r1, bn, rb, rn); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - paint3(r1, bn, rb, rn); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(r->act.b[z] & bb) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } -} - -void -addreg(Adr *a, int rn) -{ - a->sym = nil; - a->node = nil; - a->offset = 0; - a->type = TYPE_REG; - a->reg = rn; - a->name = 0; - - ostats.ncvtreg++; -} - -uint32 +uint64 RtoB(int r) { if(r < REG_AX || r > REG_DI) return 0; - return 1L << (r-REG_AX); + return 1ULL << (r-REG_AX); } int -BtoR(uint32 b) +BtoR(uint64 b) { b &= 0xffL; @@ -1171,93 +94,19 @@ BtoR(uint32 b) return bitno(b) + REG_AX; } -uint32 +uint64 FtoB(int f) { if(f < REG_X0 || f > REG_X7) return 0; - return 1L << (f - REG_X0 + 8); + return 1ULL << (f - REG_X0 + 8); } int -BtoF(uint32 b) +BtoF(uint64 b) { b &= 0xFF00L; if(b == 0) return 0; return bitno(b) - 8 + REG_X0; } - -void -dumpone(Flow *f, int isreg) -{ - int z; - Bits bit; - Reg *r; - - print("%d:%P", f->loop, f->prog); - if(isreg) { - r = (Reg*)f; - for(z=0; zset.b[z] | - r->use1.b[z] | - r->use2.b[z] | - r->refbehind.b[z] | - r->refahead.b[z] | - r->calbehind.b[z] | - r->calahead.b[z] | - r->regdiff.b[z] | - r->act.b[z] | - 0; - if(bany(&bit)) { - print("\t"); - if(bany(&r->set)) - print(" s:%Q", r->set); - if(bany(&r->use1)) - print(" u1:%Q", r->use1); - if(bany(&r->use2)) - print(" u2:%Q", r->use2); - if(bany(&r->refbehind)) - print(" rb:%Q ", r->refbehind); - if(bany(&r->refahead)) - print(" ra:%Q ", r->refahead); - if(bany(&r->calbehind)) - print(" cb:%Q ", r->calbehind); - if(bany(&r->calahead)) - print(" ca:%Q ", r->calahead); - if(bany(&r->regdiff)) - print(" d:%Q ", r->regdiff); - if(bany(&r->act)) - print(" a:%Q ", r->act); - } - } - print("\n"); -} - -void -dumpit(char *str, Flow *r0, int isreg) -{ - Flow *r, *r1; - - print("\n%s\n", str); - for(r = r0; r != nil; r = r->link) { - dumpone(r, isreg); - r1 = r->p2; - if(r1 != nil) { - print(" pred:"); - for(; r1 != nil; r1 = r1->p2link) - print(" %.4ud", (int)r1->prog->pc); - print("\n"); - } - // Print successors if it's not just the next one - if(r->s1 != r->link || r->s2 != nil) { - print(" succ:"); - if(r->s1 != nil) - print(" %.4ud", (int)r->s1->prog->pc); - if(r->s2 != nil) - print(" %.4ud", (int)r->s2->prog->pc); - print("\n"); - } - } -} diff --git a/src/cmd/9g/galign.c b/src/cmd/9g/galign.c index 39db87d48a..5ee535de05 100644 --- a/src/cmd/9g/galign.c +++ b/src/cmd/9g/galign.c @@ -73,14 +73,22 @@ main(int argc, char **argv) arch.ginscall = ginscall; arch.igen = igen; arch.linkarchinit = linkarchinit; + arch.peep = peep; arch.proginfo = proginfo; arch.regalloc = regalloc; arch.regfree = regfree; - arch.regopt = regopt; arch.regtyp = regtyp; arch.sameaddr = sameaddr; arch.smallindir = smallindir; arch.stackaddr = stackaddr; + arch.excludedregs = excludedregs; + arch.RtoB = RtoB; + arch.FtoB = RtoB; + arch.BtoR = BtoR; + arch.BtoF = BtoF; + arch.optoas = optoas; + arch.doregbits = doregbits; + arch.regnames = regnames; gcmain(argc, argv); } diff --git a/src/cmd/9g/gg.h b/src/cmd/9g/gg.h index 235b8b9731..cc44f3586c 100644 --- a/src/cmd/9g/gg.h +++ b/src/cmd/9g/gg.h @@ -154,3 +154,19 @@ int smallindir(Addr*, Addr*); int stackaddr(Addr*); Prog* unpatch(Prog*); + +/* + * reg.c + */ +uint64 excludedregs(void); +uint64 RtoB(int); +uint64 FtoB(int); +int BtoR(uint64); +int BtoF(uint64); +uint64 doregbits(int); +char** regnames(int*); + +/* + * peep.c + */ +void peep(Prog*); diff --git a/src/cmd/9g/ggen.c b/src/cmd/9g/ggen.c index 7e8efb5048..7b34282685 100644 --- a/src/cmd/9g/ggen.c +++ b/src/cmd/9g/ggen.c @@ -7,7 +7,7 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" static Prog *appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset); static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi); diff --git a/src/cmd/9g/opt.h b/src/cmd/9g/opt.h index 25b6703279..79a34fb1f0 100644 --- a/src/cmd/9g/opt.h +++ b/src/cmd/9g/opt.h @@ -1,175 +1,6 @@ -// Derived from Inferno utils/6c/gc.h -// http://code.google.com/p/inferno-os/source/browse/utils/6c/gc.h -// -// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. -// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) -// Portions Copyright © 1997-1999 Vita Nuova Limited -// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) -// Portions Copyright © 2004,2006 Bruce Ellis -// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) -// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others -// Portions Copyright © 2009 The Go Authors. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#define Z N -#define Adr Addr - -#define BLOAD(r) band(bnot(r->refbehind), r->refahead) -#define BSTORE(r) band(bnot(r->calbehind), r->calahead) -#define LOAD(r) (~r->refbehind.b[z] & r->refahead.b[z]) -#define STORE(r) (~r->calbehind.b[z] & r->calahead.b[z]) - -#define CLOAD 5 -#define CREF 5 -#define CINF 1000 -#define LOOP 3 - -typedef struct Reg Reg; -typedef struct Rgn Rgn; - -/*c2go -extern Node *Z; -enum -{ - CLOAD = 5, - CREF = 5, - CINF = 1000, - LOOP = 3, -}; - -uint32 BLOAD(Reg*); -uint32 BSTORE(Reg*); -uint32 LOAD(Reg*); -uint32 STORE(Reg*); -*/ - -// A Reg is a wrapper around a single Prog (one instruction) that holds -// register optimization information while the optimizer runs. -// r->prog is the instruction. -// r->prog->opt points back to r. -struct Reg -{ - Flow f; - - Bits set; // regopt variables written by this instruction. - Bits use1; // regopt variables read by prog->from. - Bits use2; // regopt variables read by prog->to. - - // refahead/refbehind are the regopt variables whose current - // value may be used in the following/preceding instructions - // up to a CALL (or the value is clobbered). - Bits refbehind; - Bits refahead; - // calahead/calbehind are similar, but for variables in - // instructions that are reachable after hitting at least one - // CALL. - Bits calbehind; - Bits calahead; - Bits regdiff; - Bits act; - - uint64 regu; // register used bitmap -}; -#define R ((Reg*)0) -/*c2go extern Reg *R; */ - -#define NRGN 600 -/*c2go enum { NRGN = 600 }; */ - -// A Rgn represents a single regopt variable over a region of code -// where a register could potentially be dedicated to that variable. -// The code encompassed by a Rgn is defined by the flow graph, -// starting at enter, flood-filling forward while varno is refahead -// and backward while varno is refbehind, and following branches. A -// single variable may be represented by multiple disjoint Rgns and -// each Rgn may choose a different register for that variable. -// Registers are allocated to regions greedily in order of descending -// cost. -struct Rgn -{ - Reg* enter; - short cost; - short varno; - short regno; -}; - -EXTERN int32 exregoffset; // not set -EXTERN int32 exfregoffset; // not set -EXTERN Reg zreg; -EXTERN Rgn region[NRGN]; -EXTERN Rgn* rgp; -EXTERN int nregion; -EXTERN int nvar; -EXTERN int32 regbits; -EXTERN int32 exregbits; // TODO(austin) not used; remove -EXTERN Bits externs; -EXTERN Bits params; -EXTERN Bits consts; -EXTERN Bits addrs; -EXTERN Bits ivar; -EXTERN Bits ovar; -EXTERN int change; -EXTERN int32 maxnr; - -EXTERN struct -{ - int32 ncvtreg; - int32 nspill; - int32 ndelmov; - int32 nvar; -} ostats; - -/* - * reg.c - */ -int rcmp(const void*, const void*); -void regopt(Prog*); -void addmove(Reg*, int, int, int); -Bits mkvar(Reg*, Adr*); -void prop(Reg*, Bits, Bits); -void synch(Reg*, Bits); -uint64 allreg(uint64, Rgn*); -void paint1(Reg*, int); -uint64 paint2(Reg*, int, int); -void paint3(Reg*, int, uint64, int); -void addreg(Adr*, int); -void dumpone(Flow*, int); -void dumpit(char*, Flow*, int); - -/* - * peep.c - */ -void peep(Prog*); -void excise(Flow*); -int copyu(Prog*, Adr*, Adr*); - -uint64 RtoB(int); -uint64 FtoB(int); -int BtoR(uint64); -int BtoF(uint64); - -/* - * prog.c - */ -void proginfo(ProgInfo*, Prog*); +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. // Many Power ISA arithmetic and logical instructions come in four // standard variants. These bits let us map between variants. diff --git a/src/cmd/9g/peep.c b/src/cmd/9g/peep.c index 95ff0b4d58..1ca28dde05 100644 --- a/src/cmd/9g/peep.c +++ b/src/cmd/9g/peep.c @@ -31,6 +31,7 @@ #include #include #include "gg.h" +#include "../gc/popt.h" #include "opt.h" static int regzer(Addr *a); @@ -42,6 +43,7 @@ static int copyau(Addr*, Addr*); static int copysub(Addr*, Addr*, Addr*, int); static int copysub1(Prog*, Addr*, Addr*, int); static int copyau1(Prog *p, Addr *v); +static int copyu(Prog *p, Addr *v, Addr *s); static uint32 gactive; @@ -568,7 +570,7 @@ copy1(Addr *v1, Addr *v2, Flow *r, int f) // 4 if v is set in one address and used in another (so addresses // can be rewritten independently) // 0 otherwise (not touched) -int +static int copyu(Prog *p, Addr *v, Addr *s) { if(p->from3.type != TYPE_NONE) diff --git a/src/cmd/9g/prog.c b/src/cmd/9g/prog.c index 1775993a97..561249c358 100644 --- a/src/cmd/9g/prog.c +++ b/src/cmd/9g/prog.c @@ -5,6 +5,7 @@ #include #include #include "gg.h" +#include "../gc/popt.h" #include "opt.h" enum { diff --git a/src/cmd/9g/reg.c b/src/cmd/9g/reg.c index a7ee07e547..84e1747e8d 100644 --- a/src/cmd/9g/reg.c +++ b/src/cmd/9g/reg.c @@ -31,56 +31,12 @@ #include #include #include "gg.h" -#include "opt.h" +#include "../gc/popt.h" -#define NREGVAR 64 /* 32 general + 32 floating */ -#define REGBITS ((uint64)0xffffffffffffffffull) -/*c2go enum { - NREGVAR = 64, - REGBITS = 0xffffffffffffffff, +enum { + NREGVAR = 64, /* 32 general + 32 floating */ }; -*/ -static Reg* firstr; -static int first = 1; - -int -rcmp(const void *a1, const void *a2) -{ - Rgn *p1, *p2; - int c1, c2; - - p1 = (Rgn*)a1; - p2 = (Rgn*)a2; - c1 = p2->cost; - c2 = p1->cost; - if(c1 -= c2) - return c1; - return p2->varno - p1->varno; -} - -static void -setaddrs(Bits bit) -{ - int i, n; - Var *v; - Node *node; - - while(bany(&bit)) { - // convert each bit to a variable - i = bnum(bit); - node = var[i].node; - n = var[i].name; - biclr(&bit, i); - - // disable all pieces of that variable - for(i=0; inode == node && v->name == n) - v->addr = 2; - } - } -} static char* regname[] = { ".R0", @@ -149,1059 +105,32 @@ static char* regname[] = { ".F31", }; -static Node* regnodes[NREGVAR]; - -static void walkvardef(Node *n, Reg *r, int active); - -void -regopt(Prog *firstp) +char** +regnames(int *n) { - Reg *r, *r1; - Prog *p; - Graph *g; - ProgInfo info; - int i, z, active; - uint64 vreg, usedreg; - Bits bit; - - if(first) { - fmtinstall('Q', Qconv); - first = 0; - } - - mergetemp(firstp); + *n = NREGVAR; + return regname; +} - /* - * control flow is more complicated in generated go code - * than in generated c code. define pseudo-variables for - * registers, so we have complete register usage information. - */ - nvar = NREGVAR; - memset(var, 0, NREGVAR*sizeof var[0]); - for(i=0; iopt = nil; - return; - } - - firstr = (Reg*)g->start; - - for(r = firstr; r != R; r = (Reg*)r->f.link) { - p = r->f.prog; - if(p->as == AVARDEF || p->as == AVARKILL) - continue; - proginfo(&info, p); - - // Avoid making variables for direct-called functions. - if(p->as == ABL && p->to.name == NAME_EXTERN) - continue; - - // from vs to doesn't matter for registers - r->use1.b[0] |= info.reguse | info.regindex; - r->set.b[0] |= info.regset; - - // Compute used register for from - bit = mkvar(r, &p->from); - if(info.flags & LeftAddr) - setaddrs(bit); - if(info.flags & LeftRead) - for(z=0; zuse1.b[z] |= bit.b[z]; - - // Compute used register for reg - if(info.flags & RegRead) - r->use1.b[0] |= RtoB(p->reg); - - // Currently we never generate three register forms. - // If we do, this will need to change. - if(p->from3.type != TYPE_NONE) - fatal("regopt not implemented for from3"); - - // Compute used register for to - bit = mkvar(r, &p->to); - if(info.flags & RightAddr) - setaddrs(bit); - if(info.flags & RightRead) - for(z=0; zuse2.b[z] |= bit.b[z]; - if(info.flags & RightWrite) - for(z=0; zset.b[z] |= bit.b[z]; - } - - for(i=0; iaddr) { - bit = blsh(i); - for(z=0; zaddr, v->etype, v->width, v->node, v->offset); - } - - if(debug['R'] && debug['v']) - dumpit("pass1", &firstr->f, 1); - - /* - * pass 2 - * find looping structure - */ - flowrpo(g); - - if(debug['R'] && debug['v']) - dumpit("pass2", &firstr->f, 1); - - /* - * pass 2.5 - * iterate propagating fat vardef covering forward - * r->act records vars with a VARDEF since the last CALL. - * (r->act will be reused in pass 5 for something else, - * but we'll be done with it by then.) - */ - active = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) { - r->f.active = 0; - r->act = zbits; - } - for(r = firstr; r != R; r = (Reg*)r->f.link) { - p = r->f.prog; - if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) { - active++; - walkvardef(p->to.node, r, active); - } - } - - /* - * pass 3 - * iterate propagating usage - * back until flow graph is complete - */ -loop1: - change = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->f.active = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - if(r->f.prog->as == ARET) - prop(r, zbits, zbits); -loop11: - /* pick up unreachable code */ - i = 0; - for(r = firstr; r != R; r = r1) { - r1 = (Reg*)r->f.link; - if(r1 && r1->f.active && !r->f.active) { - prop(r, zbits, zbits); - i = 1; - } - } - if(i) - goto loop11; - if(change) - goto loop1; - - if(debug['R'] && debug['v']) - dumpit("pass3", &firstr->f, 1); - - /* - * pass 4 - * iterate propagating register/variable synchrony - * forward until graph is complete - */ -loop2: - change = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->f.active = 0; - synch(firstr, zbits); - if(change) - goto loop2; - - if(debug['R'] && debug['v']) - dumpit("pass4", &firstr->f, 1); - - /* - * pass 4.5 - * move register pseudo-variables into regu. - */ - for(r = firstr; r != R; r = (Reg*)r->f.link) { - r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; - - r->set.b[0] &= ~REGBITS; - r->use1.b[0] &= ~REGBITS; - r->use2.b[0] &= ~REGBITS; - r->refbehind.b[0] &= ~REGBITS; - r->refahead.b[0] &= ~REGBITS; - r->calbehind.b[0] &= ~REGBITS; - r->calahead.b[0] &= ~REGBITS; - r->regdiff.b[0] &= ~REGBITS; - r->act.b[0] &= ~REGBITS; - } - - if(debug['R'] && debug['v']) - dumpit("pass4.5", &firstr->f, 1); - - /* - * pass 5 - * isolate regions - * calculate costs (paint1) - */ - r = firstr; - if(r) { - for(z=0; zrefahead.b[z] | r->calahead.b[z]) & - ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); - if(bany(&bit) && !r->f.refset) { - // should never happen - all variables are preset - if(debug['w']) - print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); - r->f.refset = 1; - } - } - for(r = firstr; r != R; r = (Reg*)r->f.link) - r->act = zbits; - rgp = region; - nregion = 0; - for(r = firstr; r != R; r = (Reg*)r->f.link) { - for(z=0; zset.b[z] & - ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); - if(bany(&bit) && !r->f.refset) { - if(debug['w']) - print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); - r->f.refset = 1; - excise(&r->f); - } - for(z=0; zact.b[z] | addrs.b[z]); - while(bany(&bit)) { - i = bnum(bit); - rgp->enter = r; - rgp->varno = i; - change = 0; - paint1(r, i); - biclr(&bit, i); - if(change <= 0) - continue; - rgp->cost = change; - nregion++; - if(nregion >= NRGN) { - if(debug['R'] && debug['v']) - print("too many regions\n"); - goto brk; - } - rgp++; - } - } -brk: - qsort(region, nregion, sizeof(region[0]), rcmp); - - if(debug['R'] && debug['v']) - dumpit("pass5", &firstr->f, 1); - - /* - * pass 6 - * determine used registers (paint2) - * replace code (paint3) - */ - rgp = region; - if(debug['R'] && debug['v']) - print("\nregisterizing\n"); - for(i=0; icost, rgp->varno, rgp->enter->f.prog->pc); - bit = blsh(rgp->varno); - usedreg = paint2(rgp->enter, rgp->varno, 0); - vreg = allreg(usedreg, rgp); - if(rgp->regno != 0) { - if(debug['R'] && debug['v']) { - Var *v; - - v = var + rgp->varno; - print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%llx vreg=%llx\n", - v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg); - } - paint3(rgp->enter, rgp->varno, vreg, rgp->regno); - } - rgp++; - } - - /* - * free aux structures. peep allocates new ones. - */ - for(i=0; iopt = nil; - flowend(g); - firstr = R; - - if(debug['R'] && debug['v']) { - // Rebuild flow graph, since we inserted instructions - g = flowstart(firstp, sizeof(Reg)); - firstr = (Reg*)g->start; - dumpit("pass6", &firstr->f, 1); - flowend(g); - firstr = R; - } - - /* - * pass 7 - * peep-hole on basic block - */ - if(!debug['R'] || debug['P']) - peep(firstp); - - /* - * eliminate nops - */ - for(p=firstp; p!=P; p=p->link) { - while(p->link != P && p->link->as == ANOP) - p->link = p->link->link; - if(p->to.type == TYPE_BRANCH) - while(p->to.u.branch != P && p->to.u.branch->as == ANOP) - p->to.u.branch = p->to.u.branch->link; - } - - if(debug['R']) { - if(ostats.ncvtreg || - ostats.nspill || - ostats.ndelmov || - ostats.nvar || - 0) - print("\nstats\n"); - - if(ostats.ncvtreg) - print(" %4d cvtreg\n", ostats.ncvtreg); - if(ostats.nspill) - print(" %4d spill\n", ostats.nspill); - if(ostats.ndelmov) - print(" %4d delmov\n", ostats.ndelmov); - if(ostats.nvar) - print(" %4d var\n", ostats.nvar); - - memset(&ostats, 0, sizeof(ostats)); - } - - return; -} - -static void -walkvardef(Node *n, Reg *r, int active) -{ - Reg *r1, *r2; - int bn; - Var *v; - - for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { - if(r1->f.active == active) - break; - r1->f.active = active; - if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) - break; - for(v=n->opt; v!=nil; v=v->nextinnode) { - bn = v - var; - biset(&r1->act, bn); - } - if(r1->f.prog->as == ABL) - break; - } - - for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) - if(r2->f.s2 != nil) - walkvardef(n, (Reg*)r2->f.s2, active); -} - -/* - * add mov b,rn - * just after r - */ -void -addmove(Reg *r, int bn, int rn, int f) -{ - Prog *p, *p1, *p2; - Adr *a; - Var *v; - - p1 = mal(sizeof(*p1)); - *p1 = zprog; - p = r->f.prog; - - // If there's a stack fixup coming (ADD $n,R1 after BL newproc or BL deferproc), - // delay the load until after the fixup. - p2 = p->link; - if(p2 && p2->as == AADD && p2->to.reg == REGSP && p2->to.type == TYPE_REG) - p = p2; - - p1->link = p->link; - p->link = p1; - p1->lineno = p->lineno; - - v = var + bn; - - a = &p1->to; - a->name = v->name; - a->node = v->node; - a->sym = linksym(v->node->sym); - a->offset = v->offset; - a->etype = v->etype; - a->type = TYPE_MEM; - if(a->etype == TARRAY) - a->type = TYPE_ADDR; - else if(a->sym == nil) - a->type = TYPE_CONST; - - if(v->addr) - fatal("addmove: shouldn't be doing this %A\n", a); - - switch(v->etype) { - default: - print("What is this %E\n", v->etype); - - case TINT8: - p1->as = AMOVB; - break; - case TBOOL: - case TUINT8: -//print("movbu %E %d %S\n", v->etype, bn, v->sym); - p1->as = AMOVBZ; - break; - case TINT16: - p1->as = AMOVH; - break; - case TUINT16: - p1->as = AMOVHZ; - break; - case TINT32: - p1->as = AMOVW; - break; - case TUINT32: - case TPTR32: - p1->as = AMOVWZ; - break; - case TINT64: - case TUINT64: - case TPTR64: - p1->as = AMOVD; - break; - case TFLOAT32: - p1->as = AFMOVS; - break; - case TFLOAT64: - p1->as = AFMOVD; - break; - } - - p1->from.type = TYPE_REG; - p1->from.reg = rn; - if(!f) { - p1->from = *a; - *a = zprog.from; - a->type = TYPE_REG; - a->reg = rn; - if(v->etype == TUINT8 || v->etype == TBOOL) - p1->as = AMOVBZ; - if(v->etype == TUINT16) - p1->as = AMOVHZ; - } - if(debug['R']) - print("%P\t.a%P\n", p, p1); - ostats.nspill++; -} - -static int -overlap(int64 o1, int w1, int64 o2, int w2) -{ - int64 t1, t2; - - t1 = o1+w1; - t2 = o2+w2; - - if(!(t1 > o2 && t2 > o1)) - return 0; - - return 1; -} - -Bits -mkvar(Reg *r, Adr *a) -{ - USED(r); - Var *v; - int i, t, n, et, z, flag; - int64 w; - int64 o; - Bits bit; - Node *node; - - // mark registers used - t = a->type; - switch(t) { - default: - print("type %d %d %D\n", t, a->name, a); - goto none; - - case TYPE_NONE: - goto none; - - case TYPE_BRANCH: - case TYPE_CONST: - case TYPE_FCONST: - case TYPE_SCONST: - case TYPE_MEM: - case TYPE_ADDR: - break; - - case TYPE_REG: - if(a->reg != 0) { - bit = zbits; - bit.b[0] = RtoB(a->reg); - return bit; - } - break; - } - - switch(a->name) { - default: - goto none; - - case NAME_EXTERN: - case NAME_STATIC: - case NAME_AUTO: - case NAME_PARAM: - n = a->name; - break; - } - - node = a->node; - if(node == N || node->op != ONAME || node->orig == N) - goto none; - node = node->orig; - if(node->orig != node) - fatal("%D: bad node", a); - if(node->sym == S || node->sym->name[0] == '.') - goto none; - et = a->etype; - o = a->offset; - w = a->width; - if(w < 0) - fatal("bad width %lld for %D", w, a); - - flag = 0; - for(i=0; inode == node && v->name == n) { - if(v->offset == o) - if(v->etype == et) - if(v->width == w) - return blsh(i); - - // if they overlap, disable both - if(overlap(v->offset, v->width, o, w)) { - v->addr = 1; - flag = 1; - } - } - } - - switch(et) { - case 0: - case TFUNC: - goto none; - } - - if(nvar >= NVAR) { - if(debug['w'] > 1 && node != N) - fatal("variable not optimized: %#N", node); - - // If we're not tracking a word in a variable, mark the rest as - // having its address taken, so that we keep the whole thing - // live at all calls. otherwise we might optimize away part of - // a variable but not all of it. - for(i=0; inode == node) - v->addr = 1; - } - goto none; - } - - i = nvar; - nvar++; - v = var+i; - v->offset = o; - v->name = n; - v->etype = et; - v->width = w; - v->addr = flag; // funny punning - v->node = node; - - // node->opt is the head of a linked list - // of Vars within the given Node, so that - // we can start at a Var and find all the other - // Vars in the same Go variable. - v->nextinnode = node->opt; - node->opt = v; - - bit = blsh(i); - if(n == NAME_EXTERN || n == NAME_STATIC) - for(z=0; zclass == PPARAM) - for(z=0; zclass == PPARAMOUT) - for(z=0; zaddrtaken) - v->addr = 1; - - // Disable registerization for globals, because: - // (1) we might panic at any time and we want the recovery code - // to see the latest values (issue 1304). - // (2) we don't know what pointers might point at them and we want - // loads via those pointers to see updated values and vice versa (issue 7995). - // - // Disable registerization for results if using defer, because the deferred func - // might recover and return, causing the current values to be used. - if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) - v->addr = 1; - - if(debug['R']) - print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); - ostats.nvar++; - - return bit; - -none: - return zbits; -} - -void -prop(Reg *r, Bits ref, Bits cal) -{ - Reg *r1, *r2; - int z, i, j; - Var *v, *v1; - - for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { - for(z=0; zrefahead.b[z]; - if(ref.b[z] != r1->refahead.b[z]) { - r1->refahead.b[z] = ref.b[z]; - change++; - } - cal.b[z] |= r1->calahead.b[z]; - if(cal.b[z] != r1->calahead.b[z]) { - r1->calahead.b[z] = cal.b[z]; - change++; - } - } - switch(r1->f.prog->as) { - case ABL: - if(noreturn(r1->f.prog)) - break; - - // Mark all input variables (ivar) as used, because that's what the - // liveness bitmaps say. The liveness bitmaps say that so that a - // panic will not show stale values in the parameter dump. - // Mark variables with a recent VARDEF (r1->act) as used, - // so that the optimizer flushes initializations to memory, - // so that if a garbage collection happens during this CALL, - // the collector will see initialized memory. Again this is to - // match what the liveness bitmaps say. - for(z=0; zact.b[z]; - ref.b[z] = 0; - } - - // cal.b is the current approximation of what's live across the call. - // Every bit in cal.b is a single stack word. For each such word, - // find all the other tracked stack words in the same Go variable - // (struct/slice/string/interface) and mark them live too. - // This is necessary because the liveness analysis for the garbage - // collector works at variable granularity, not at word granularity. - // It is fundamental for slice/string/interface: the garbage collector - // needs the whole value, not just some of the words, in order to - // interpret the other bits correctly. Specifically, slice needs a consistent - // ptr and cap, string needs a consistent ptr and len, and interface - // needs a consistent type word and data word. - for(z=0; z= nvar || ((cal.b[z]>>i)&1) == 0) - continue; - v = var+z*64+i; - if(v->node->opt == nil) // v represents fixed register, not Go variable - continue; - - // v->node->opt is the head of a linked list of Vars - // corresponding to tracked words from the Go variable v->node. - // Walk the list and set all the bits. - // For a large struct this could end up being quadratic: - // after the first setting, the outer loop (for z, i) would see a 1 bit - // for all of the remaining words in the struct, and for each such - // word would go through and turn on all the bits again. - // To avoid the quadratic behavior, we only turn on the bits if - // v is the head of the list or if the head's bit is not yet turned on. - // This will set the bits at most twice, keeping the overall loop linear. - v1 = v->node->opt; - j = v1 - var; - if(v == v1 || !btest(&cal, j)) { - for(; v1 != nil; v1 = v1->nextinnode) { - j = v1 - var; - biset(&cal, j); - } - } - } - } - break; - - case ATEXT: - for(z=0; zset.b[z]) | - r1->use1.b[z] | r1->use2.b[z]; - cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); - r1->refbehind.b[z] = ref.b[z]; - r1->calbehind.b[z] = cal.b[z]; - } - if(r1->f.active) - break; - r1->f.active = 1; - } - for(; r != r1; r = (Reg*)r->f.p1) - for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) - prop(r2, r->refbehind, r->calbehind); -} - -void -synch(Reg *r, Bits dif) -{ - Reg *r1; - int z; - - for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { - for(z=0; zrefbehind.b[z] & r1->refahead.b[z])) | - r1->set.b[z] | r1->regdiff.b[z]; - if(dif.b[z] != r1->regdiff.b[z]) { - r1->regdiff.b[z] = dif.b[z]; - change++; - } - } - if(r1->f.active) - break; - r1->f.active = 1; - for(z=0; zcalbehind.b[z] & r1->calahead.b[z]); - if(r1->f.s2 != nil) - synch((Reg*)r1->f.s2, dif); - } + return regbits; } uint64 -allreg(uint64 b, Rgn *r) +doregbits(int r) { - Var *v; - int i; - - v = var + r->varno; - r->regno = 0; - switch(v->etype) { - - default: - fatal("unknown etype %d/%E", bitno(b), v->etype); - break; - - case TINT8: - case TUINT8: - case TINT16: - case TUINT16: - case TINT32: - case TUINT32: - case TINT64: - case TUINT64: - case TINT: - case TUINT: - case TUINTPTR: - case TBOOL: - case TPTR32: - case TPTR64: - i = BtoR(~b); - if(i && r->cost > 0) { - r->regno = i; - return RtoB(i); - } - break; - - case TFLOAT32: - case TFLOAT64: - i = BtoF(~b); - if(i && r->cost > 0) { - r->regno = i; - return RtoB(i); - } - break; - } + USED(r); return 0; } -void -paint1(Reg *r, int bn) -{ - Reg *r1; - int z; - uint64 bb; - - z = bn/64; - bb = 1LL<<(bn%64); - if(r->act.b[z] & bb) - return; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(r1->act.b[z] & bb) - break; - r = r1; - } - - if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { - change -= CLOAD * r->f.loop; - } - for(;;) { - r->act.b[z] |= bb; - - if(r->f.prog->as != ANOP) { // don't give credit for NOPs - if(r->use1.b[z] & bb) - change += CREF * r->f.loop; - if((r->use2.b[z]|r->set.b[z]) & bb) - change += CREF * r->f.loop; - } - - if(STORE(r) & r->regdiff.b[z] & bb) { - change -= CLOAD * r->f.loop; - } - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - paint1(r1, bn); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - paint1(r1, bn); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(r->act.b[z] & bb) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } -} - -uint64 -paint2(Reg *r, int bn, int depth) -{ - Reg *r1; - int z; - uint64 bb, vreg; - - z = bn/64; - bb = 1LL << (bn%64); - vreg = regbits; - if(!(r->act.b[z] & bb)) - return vreg; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(!(r1->act.b[z] & bb)) - break; - r = r1; - } - for(;;) { - if(debug['R'] && debug['v']) - print(" paint2 %d %P\n", depth, r->f.prog); - - r->act.b[z] &= ~bb; - - vreg |= r->regu; - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - vreg |= paint2(r1, bn, depth+1); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - vreg |= paint2(r1, bn, depth+1); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(!(r->act.b[z] & bb)) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } - return vreg; -} - -void -paint3(Reg *r, int bn, uint64 rb, int rn) -{ - Reg *r1; - Prog *p; - int z; - uint64 bb; - - z = bn/64; - bb = 1LL << (bn%64); - if(r->act.b[z] & bb) - return; - for(;;) { - if(!(r->refbehind.b[z] & bb)) - break; - r1 = (Reg*)r->f.p1; - if(r1 == R) - break; - if(!(r1->refahead.b[z] & bb)) - break; - if(r1->act.b[z] & bb) - break; - r = r1; - } - - if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) - addmove(r, bn, rn, 0); - for(;;) { - r->act.b[z] |= bb; - p = r->f.prog; - - if(r->use1.b[z] & bb) { - if(debug['R'] && debug['v']) - print("%P", p); - addreg(&p->from, rn); - if(debug['R'] && debug['v']) - print(" ===change== %P\n", p); - } - if((r->use2.b[z]|r->set.b[z]) & bb) { - if(debug['R'] && debug['v']) - print("%P", p); - addreg(&p->to, rn); - if(debug['R'] && debug['v']) - print(" ===change== %P\n", p); - } - - if(STORE(r) & r->regdiff.b[z] & bb) - addmove(r, bn, rn, 1); - r->regu |= rb; - - if(r->refbehind.b[z] & bb) - for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) - if(r1->refahead.b[z] & bb) - paint3(r1, bn, rb, rn); - - if(!(r->refahead.b[z] & bb)) - break; - r1 = (Reg*)r->f.s2; - if(r1 != R) - if(r1->refbehind.b[z] & bb) - paint3(r1, bn, rb, rn); - r = (Reg*)r->f.s1; - if(r == R) - break; - if(r->act.b[z] & bb) - break; - if(!(r->refbehind.b[z] & bb)) - break; - } -} - -void -addreg(Adr *a, int rn) -{ - a->sym = nil; - a->node = nil; - a->name = NAME_NONE; - a->type = TYPE_REG; - a->reg = rn; - - ostats.ncvtreg++; -} - /* * track register variables including external registers: * bit reg @@ -1241,78 +170,3 @@ BtoF(uint64 b) return 0; return bitno(b) + REG_F0; } - -void -dumpone(Flow *f, int isreg) -{ - int z; - Bits bit; - Reg *r; - - print("%d:%P", f->loop, f->prog); - if(isreg) { - r = (Reg*)f; - for(z=0; zset.b[z] | - r->use1.b[z] | - r->use2.b[z] | - r->refbehind.b[z] | - r->refahead.b[z] | - r->calbehind.b[z] | - r->calahead.b[z] | - r->regdiff.b[z] | - r->act.b[z] | - 0; - if(bany(&bit)) { - print("\t"); - if(bany(&r->set)) - print(" s:%Q", r->set); - if(bany(&r->use1)) - print(" u1:%Q", r->use1); - if(bany(&r->use2)) - print(" u2:%Q", r->use2); - if(bany(&r->refbehind)) - print(" rb:%Q ", r->refbehind); - if(bany(&r->refahead)) - print(" ra:%Q ", r->refahead); - if(bany(&r->calbehind)) - print(" cb:%Q ", r->calbehind); - if(bany(&r->calahead)) - print(" ca:%Q ", r->calahead); - if(bany(&r->regdiff)) - print(" d:%Q ", r->regdiff); - if(bany(&r->act)) - print(" a:%Q ", r->act); - } - } - print("\n"); -} - - -void -dumpit(char *str, Flow *r0, int isreg) -{ - Flow *r, *r1; - - print("\n%s\n", str); - for(r = r0; r != nil; r = r->link) { - dumpone(r, isreg); - r1 = r->p2; - if(r1 != nil) { - print(" pred:"); - for(; r1 != nil; r1 = r1->p2link) - print(" %.4ud", (int)r1->prog->pc); - print("\n"); - } - // Print successors if it's not just the next one - if(r->s1 != r->link || r->s2 != nil) { - print(" succ:"); - if(r->s1 != nil) - print(" %.4ud", (int)r->s1->prog->pc); - if(r->s2 != nil) - print(" %.4ud", (int)r->s2->prog->pc); - print("\n"); - } - } -} diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h index 0674b2ce6b..b53655b412 100644 --- a/src/cmd/gc/go.h +++ b/src/cmd/gc/go.h @@ -1679,14 +1679,22 @@ struct Arch void (*ginscall)(Node*, int); void (*igen)(Node*, Node*, Node*); void (*linkarchinit)(void); + void (*peep)(Prog*); void (*proginfo)(ProgInfo*, Prog*); void (*regalloc)(Node*, Type*, Node*); void (*regfree)(Node*); - void (*regopt)(Prog*); int (*regtyp)(Addr*); int (*sameaddr)(Addr*, Addr*); int (*smallindir)(Addr*, Addr*); int (*stackaddr)(Addr*); + uint64 (*excludedregs)(void); + uint64 (*RtoB)(int); + uint64 (*FtoB)(int); + int (*BtoR)(uint64); + int (*BtoF)(uint64); + int (*optoas)(int, Type*); + uint64 (*doregbits)(int); + char **(*regnames)(int*); }; void afunclit(Addr*, Node*); @@ -1716,6 +1724,7 @@ Prog* unpatch(Prog*); void datagostring(Strlit *sval, Addr *a); int ismem(Node*); int samereg(Node*, Node*); +void regopt(Prog*); EXTERN int32 pcloc; diff --git a/src/cmd/gc/pgen.c b/src/cmd/gc/pgen.c index 0b37bd0856..0774e061e5 100644 --- a/src/cmd/gc/pgen.c +++ b/src/cmd/gc/pgen.c @@ -302,7 +302,7 @@ compile(Node *fn) fixjmp(ptxt); if(!debug['N'] || debug['R'] || debug['P']) { - arch.regopt(ptxt); + regopt(ptxt); nilopt(ptxt); } arch.expandchecks(ptxt); diff --git a/src/cmd/6g/opt.h b/src/cmd/gc/popt.h similarity index 93% rename from src/cmd/6g/opt.h rename to src/cmd/gc/popt.h index 11befb6ad1..833f69a212 100644 --- a/src/cmd/6g/opt.h +++ b/src/cmd/gc/popt.h @@ -28,7 +28,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - #define Z N #define Adr Addr @@ -91,7 +90,7 @@ struct Reg Bits regdiff; Bits act; - int32 regu; // register used bitmap + uint64 regu; // register used bitmap }; #define R ((Reg*)0) /*c2go extern Reg *R; */ @@ -116,15 +115,12 @@ struct Rgn short regno; }; -EXTERN int32 exregoffset; // not set -EXTERN int32 exfregoffset; // not set EXTERN Reg zreg; EXTERN Rgn region[NRGN]; EXTERN Rgn* rgp; EXTERN int nregion; EXTERN int nvar; -EXTERN int32 regbits; -EXTERN int32 exregbits; +EXTERN uint64 regbits; EXTERN Bits externs; EXTERN Bits params; EXTERN Bits consts; @@ -153,28 +149,23 @@ void addmove(Reg*, int, int, int); Bits mkvar(Reg*, Adr*); void prop(Reg*, Bits, Bits); void synch(Reg*, Bits); -uint32 allreg(uint32, Rgn*); +uint64 allreg(uint64, Rgn*); void paint1(Reg*, int); -uint32 paint2(Reg*, int, int); -void paint3(Reg*, int, uint32, int); +uint64 paint2(Reg*, int, int); +void paint3(Reg*, int, uint64, int); void addreg(Adr*, int); void dumpone(Flow*, int); void dumpit(char*, Flow*, int); /* * peep.c - */ void peep(Prog*); void excise(Flow*); int copyu(Prog*, Adr*, Adr*); - -uint32 RtoB(int); -uint32 FtoB(int); -int BtoR(uint32); -int BtoF(uint32); + */ /* * prog.c - */ void proginfo(ProgInfo*, Prog*); + */ diff --git a/src/cmd/gc/reg.c b/src/cmd/gc/reg.c new file mode 100644 index 0000000000..d7ffa1799f --- /dev/null +++ b/src/cmd/gc/reg.c @@ -0,0 +1,1193 @@ +// Derived from Inferno utils/6c/reg.c +// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include "go.h" +#include "popt.h" + +static Reg* firstr; +static int first = 1; + +int +rcmp(const void *a1, const void *a2) +{ + Rgn *p1, *p2; + int c1, c2; + + p1 = (Rgn*)a1; + p2 = (Rgn*)a2; + c1 = p2->cost; + c2 = p1->cost; + if(c1 -= c2) + return c1; + return p2->varno - p1->varno; +} + +static void +setaddrs(Bits bit) +{ + int i, n; + Var *v; + Node *node; + + while(bany(&bit)) { + // convert each bit to a variable + i = bnum(bit); + node = var[i].node; + n = var[i].name; + biclr(&bit, i); + + // disable all pieces of that variable + for(i=0; inode == node && v->name == n) + v->addr = 2; + } + } +} + +static Node* regnodes[64]; + +static void walkvardef(Node *n, Reg *r, int active); + +void +regopt(Prog *firstp) +{ + Reg *r, *r1; + Prog *p; + Graph *g; + ProgInfo info; + int i, z, active; + uint64 vreg, usedreg; + uint64 mask; + int nreg; + char **regnames; + Bits bit; + + if(first) { + fmtinstall('Q', Qconv); + first = 0; + } + + mergetemp(firstp); + + /* + * control flow is more complicated in generated go code + * than in generated c code. define pseudo-variables for + * registers, so we have complete register usage information. + */ + regnames = arch.regnames(&nreg); + nvar = nreg; + memset(var, 0, nreg*sizeof var[0]); + for(i=0; iopt = nil; + return; + } + + firstr = (Reg*)g->start; + + for(r = firstr; r != R; r = (Reg*)r->f.link) { + p = r->f.prog; + if(p->as == AVARDEF || p->as == AVARKILL) + continue; + arch.proginfo(&info, p); + + // Avoid making variables for direct-called functions. + if(p->as == ACALL && p->to.type == TYPE_MEM && p->to.name == NAME_EXTERN) + continue; + + // from vs to doesn't matter for registers. + r->use1.b[0] |= info.reguse | info.regindex; + r->set.b[0] |= info.regset; + + bit = mkvar(r, &p->from); + if(bany(&bit)) { + if(info.flags & LeftAddr) + setaddrs(bit); + if(info.flags & LeftRead) + for(z=0; zuse1.b[z] |= bit.b[z]; + if(info.flags & LeftWrite) + for(z=0; zset.b[z] |= bit.b[z]; + } + + // Compute used register for reg + if(info.flags & RegRead) + r->use1.b[0] |= arch.RtoB(p->reg); + + // Currently we never generate three register forms. + // If we do, this will need to change. + if(p->from3.type != TYPE_NONE) + fatal("regopt not implemented for from3"); + + bit = mkvar(r, &p->to); + if(bany(&bit)) { + if(info.flags & RightAddr) + setaddrs(bit); + if(info.flags & RightRead) + for(z=0; zuse2.b[z] |= bit.b[z]; + if(info.flags & RightWrite) + for(z=0; zset.b[z] |= bit.b[z]; + } + } + + for(i=0; iaddr) { + bit = blsh(i); + for(z=0; zaddr, v->etype, v->width, v->node, v->offset); + } + + if(debug['R'] && debug['v']) + dumpit("pass1", &firstr->f, 1); + + /* + * pass 2 + * find looping structure + */ + flowrpo(g); + + if(debug['R'] && debug['v']) + dumpit("pass2", &firstr->f, 1); + + /* + * pass 2.5 + * iterate propagating fat vardef covering forward + * r->act records vars with a VARDEF since the last CALL. + * (r->act will be reused in pass 5 for something else, + * but we'll be done with it by then.) + */ + active = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) { + r->f.active = 0; + r->act = zbits; + } + for(r = firstr; r != R; r = (Reg*)r->f.link) { + p = r->f.prog; + if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) { + active++; + walkvardef(p->to.node, r, active); + } + } + + /* + * pass 3 + * iterate propagating usage + * back until flow graph is complete + */ +loop1: + change = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) + r->f.active = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) + if(r->f.prog->as == ARET) + prop(r, zbits, zbits); +loop11: + /* pick up unreachable code */ + i = 0; + for(r = firstr; r != R; r = r1) { + r1 = (Reg*)r->f.link; + if(r1 && r1->f.active && !r->f.active) { + prop(r, zbits, zbits); + i = 1; + } + } + if(i) + goto loop11; + if(change) + goto loop1; + + if(debug['R'] && debug['v']) + dumpit("pass3", &firstr->f, 1); + + /* + * pass 4 + * iterate propagating register/variable synchrony + * forward until graph is complete + */ +loop2: + change = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) + r->f.active = 0; + synch(firstr, zbits); + if(change) + goto loop2; + + if(debug['R'] && debug['v']) + dumpit("pass4", &firstr->f, 1); + + /* + * pass 4.5 + * move register pseudo-variables into regu. + */ + if(nreg == 64) + mask = ~0ULL; // can't rely on C to shift by 64 + else + mask = (1ULL<f.link) { + r->regu = (r->refbehind.b[0] | r->set.b[0]) & mask; + r->set.b[0] &= ~mask; + r->use1.b[0] &= ~mask; + r->use2.b[0] &= ~mask; + r->refbehind.b[0] &= ~mask; + r->refahead.b[0] &= ~mask; + r->calbehind.b[0] &= ~mask; + r->calahead.b[0] &= ~mask; + r->regdiff.b[0] &= ~mask; + r->act.b[0] &= ~mask; + } + + if(debug['R'] && debug['v']) + dumpit("pass4.5", &firstr->f, 1); + + /* + * pass 5 + * isolate regions + * calculate costs (paint1) + */ + r = firstr; + if(r) { + for(z=0; zrefahead.b[z] | r->calahead.b[z]) & + ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); + if(bany(&bit) && !r->f.refset) { + // should never happen - all variables are preset + if(debug['w']) + print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); + r->f.refset = 1; + } + } + for(r = firstr; r != R; r = (Reg*)r->f.link) + r->act = zbits; + rgp = region; + nregion = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) { + for(z=0; zset.b[z] & + ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); + if(bany(&bit) && !r->f.refset) { + if(debug['w']) + print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); + r->f.refset = 1; + arch.excise(&r->f); + } + for(z=0; zact.b[z] | addrs.b[z]); + while(bany(&bit)) { + i = bnum(bit); + rgp->enter = r; + rgp->varno = i; + change = 0; + paint1(r, i); + biclr(&bit, i); + if(change <= 0) + continue; + rgp->cost = change; + nregion++; + if(nregion >= NRGN) { + if(debug['R'] && debug['v']) + print("too many regions\n"); + goto brk; + } + rgp++; + } + } +brk: + qsort(region, nregion, sizeof(region[0]), rcmp); + + if(debug['R'] && debug['v']) + dumpit("pass5", &firstr->f, 1); + + /* + * pass 6 + * determine used registers (paint2) + * replace code (paint3) + */ + rgp = region; + if(debug['R'] && debug['v']) + print("\nregisterizing\n"); + for(i=0; icost, rgp->varno, rgp->enter->f.prog->pc); + bit = blsh(rgp->varno); + usedreg = paint2(rgp->enter, rgp->varno, 0); + vreg = allreg(usedreg, rgp); + if(rgp->regno != 0) { + if(debug['R'] && debug['v']) { + Var *v; + + v = var + rgp->varno; + print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%#llx vreg=%#llx\n", + v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg); + } + paint3(rgp->enter, rgp->varno, vreg, rgp->regno); + } + rgp++; + } + + /* + * free aux structures. peep allocates new ones. + */ + for(i=0; iopt = nil; + flowend(g); + firstr = R; + + if(debug['R'] && debug['v']) { + // Rebuild flow graph, since we inserted instructions + g = flowstart(firstp, sizeof(Reg)); + firstr = (Reg*)g->start; + dumpit("pass6", &firstr->f, 1); + flowend(g); + firstr = R; + } + + /* + * pass 7 + * peep-hole on basic block + */ + if(!debug['R'] || debug['P']) + arch.peep(firstp); + + /* + * eliminate nops + */ + for(p=firstp; p!=P; p=p->link) { + while(p->link != P && p->link->as == ANOP) + p->link = p->link->link; + if(p->to.type == TYPE_BRANCH) + while(p->to.u.branch != P && p->to.u.branch->as == ANOP) + p->to.u.branch = p->to.u.branch->link; + } + + if(debug['R']) { + if(ostats.ncvtreg || + ostats.nspill || + ostats.nreload || + ostats.ndelmov || + ostats.nvar || + ostats.naddr || + 0) + print("\nstats\n"); + + if(ostats.ncvtreg) + print(" %4d cvtreg\n", ostats.ncvtreg); + if(ostats.nspill) + print(" %4d spill\n", ostats.nspill); + if(ostats.nreload) + print(" %4d reload\n", ostats.nreload); + if(ostats.ndelmov) + print(" %4d delmov\n", ostats.ndelmov); + if(ostats.nvar) + print(" %4d var\n", ostats.nvar); + if(ostats.naddr) + print(" %4d addr\n", ostats.naddr); + + memset(&ostats, 0, sizeof(ostats)); + } +} + +static void +walkvardef(Node *n, Reg *r, int active) +{ + Reg *r1, *r2; + int bn; + Var *v; + + for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { + if(r1->f.active == active) + break; + r1->f.active = active; + if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) + break; + for(v=n->opt; v!=nil; v=v->nextinnode) { + bn = v - var; + biset(&r1->act, bn); + } + if(r1->f.prog->as == ACALL) + break; + } + + for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) + if(r2->f.s2 != nil) + walkvardef(n, (Reg*)r2->f.s2, active); +} + +/* + * add mov b,rn + * just after r + */ +void +addmove(Reg *r, int bn, int rn, int f) +{ + Prog *p, *p1; + Adr *a; + Var *v; + + p1 = mal(sizeof(*p1)); + clearp(p1); + p1->pc = 9999; + + p = r->f.prog; + p1->link = p->link; + p->link = p1; + p1->lineno = p->lineno; + + v = var + bn; + + a = &p1->to; + a->offset = v->offset; + a->etype = v->etype; + a->type = TYPE_MEM; + a->name = v->name; + a->node = v->node; + a->sym = linksym(v->node->sym); + /* NOTE(rsc): 9g did + if(a->etype == TARRAY) + a->type = TYPE_ADDR; + else if(a->sym == nil) + a->type = TYPE_CONST; + */ + + p1->as = arch.optoas(OAS, types[(uchar)v->etype]); + // TODO(rsc): Remove special case here. + if((arch.thechar == '9' || arch.thechar == '5') && v->etype == TBOOL) + p1->as = arch.optoas(OAS, types[TUINT8]); + p1->from.type = TYPE_REG; + p1->from.reg = rn; + p1->from.name = NAME_NONE; + if(!f) { + p1->from = *a; + *a = zprog.from; + a->type = TYPE_REG; + a->reg = rn; + } + if(debug['R'] && debug['v']) + print("%P ===add=== %P\n", p, p1); + ostats.nspill++; +} + +static int +overlap(int64 o1, int w1, int64 o2, int w2) +{ + int64 t1, t2; + + t1 = o1+w1; + t2 = o2+w2; + + if(!(t1 > o2 && t2 > o1)) + return 0; + + return 1; +} + +Bits +mkvar(Reg *r, Adr *a) +{ + Var *v; + int i, n, et, z, flag; + int64 w; + uint64 regu; + int64 o; + Bits bit; + Node *node; + + /* + * mark registers used + */ + if(a->type == TYPE_NONE) + goto none; + + if(r != R) + r->use1.b[0] |= arch.doregbits(a->index); // TODO: Use RtoB + + switch(a->type) { + default: + regu = arch.doregbits(a->reg) | arch.RtoB(a->reg); // TODO: Use RtoB + if(regu == 0) + goto none; + bit = zbits; + bit.b[0] = regu; + return bit; + + case TYPE_ADDR: + // TODO(rsc): Remove special case here. + if(arch.thechar == '9' || arch.thechar == '5') + goto memcase; + a->type = TYPE_MEM; + bit = mkvar(r, a); + setaddrs(bit); + a->type = TYPE_ADDR; + ostats.naddr++; + goto none; + + case TYPE_MEM: + memcase: + if(r != R) { + r->use1.b[0] |= arch.RtoB(a->reg); + /* NOTE: 5g did + if(r->f.prog->scond & (C_PBIT|C_WBIT)) + r->set.b[0] |= RtoB(a->reg); + */ + } + switch(a->name) { + default: + goto none; + case NAME_EXTERN: + case NAME_STATIC: + case NAME_PARAM: + case NAME_AUTO: + n = a->name; + break; + } + } + + node = a->node; + if(node == N || node->op != ONAME || node->orig == N) + goto none; + node = node->orig; + if(node->orig != node) + fatal("%D: bad node", a); + if(node->sym == S || node->sym->name[0] == '.') + goto none; + et = a->etype; + o = a->offset; + w = a->width; + if(w < 0) + fatal("bad width %lld for %D", w, a); + + flag = 0; + for(i=0; inode == node && v->name == n) { + if(v->offset == o) + if(v->etype == et) + if(v->width == w) { + // TODO(rsc): Remove special case for arm here. + if(!flag || arch.thechar != '5') + return blsh(i); + } + + // if they overlap, disable both + if(overlap(v->offset, v->width, o, w)) { +// print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et); + v->addr = 1; + flag = 1; + } + } + } + + switch(et) { + case 0: + case TFUNC: + goto none; + } + + if(nvar >= NVAR) { + if(debug['w'] > 1 && node != N) + fatal("variable not optimized: %#N", node); + + // If we're not tracking a word in a variable, mark the rest as + // having its address taken, so that we keep the whole thing + // live at all calls. otherwise we might optimize away part of + // a variable but not all of it. + for(i=0; inode == node) + v->addr = 1; + } + goto none; + } + + i = nvar; + nvar++; + v = var+i; + v->offset = o; + v->name = n; + v->etype = et; + v->width = w; + v->addr = flag; // funny punning + v->node = node; + + // node->opt is the head of a linked list + // of Vars within the given Node, so that + // we can start at a Var and find all the other + // Vars in the same Go variable. + v->nextinnode = node->opt; + node->opt = v; + + bit = blsh(i); + if(n == NAME_EXTERN || n == NAME_STATIC) + for(z=0; zclass == PPARAM) + for(z=0; zclass == PPARAMOUT) + for(z=0; zaddrtaken) + v->addr = 1; + + // Disable registerization for globals, because: + // (1) we might panic at any time and we want the recovery code + // to see the latest values (issue 1304). + // (2) we don't know what pointers might point at them and we want + // loads via those pointers to see updated values and vice versa (issue 7995). + // + // Disable registerization for results if using defer, because the deferred func + // might recover and return, causing the current values to be used. + if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) + v->addr = 1; + + if(debug['R']) + print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); + ostats.nvar++; + + return bit; + +none: + return zbits; +} + +void +prop(Reg *r, Bits ref, Bits cal) +{ + Reg *r1, *r2; + int z, i, j; + Var *v, *v1; + + for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { + for(z=0; zrefahead.b[z]; + if(ref.b[z] != r1->refahead.b[z]) { + r1->refahead.b[z] = ref.b[z]; + change++; + } + cal.b[z] |= r1->calahead.b[z]; + if(cal.b[z] != r1->calahead.b[z]) { + r1->calahead.b[z] = cal.b[z]; + change++; + } + } + switch(r1->f.prog->as) { + case ACALL: + if(noreturn(r1->f.prog)) + break; + + // Mark all input variables (ivar) as used, because that's what the + // liveness bitmaps say. The liveness bitmaps say that so that a + // panic will not show stale values in the parameter dump. + // Mark variables with a recent VARDEF (r1->act) as used, + // so that the optimizer flushes initializations to memory, + // so that if a garbage collection happens during this CALL, + // the collector will see initialized memory. Again this is to + // match what the liveness bitmaps say. + for(z=0; zact.b[z]; + ref.b[z] = 0; + } + + // cal.b is the current approximation of what's live across the call. + // Every bit in cal.b is a single stack word. For each such word, + // find all the other tracked stack words in the same Go variable + // (struct/slice/string/interface) and mark them live too. + // This is necessary because the liveness analysis for the garbage + // collector works at variable granularity, not at word granularity. + // It is fundamental for slice/string/interface: the garbage collector + // needs the whole value, not just some of the words, in order to + // interpret the other bits correctly. Specifically, slice needs a consistent + // ptr and cap, string needs a consistent ptr and len, and interface + // needs a consistent type word and data word. + for(z=0; z= nvar || ((cal.b[z]>>i)&1) == 0) + continue; + v = var+z*64+i; + if(v->node->opt == nil) // v represents fixed register, not Go variable + continue; + + // v->node->opt is the head of a linked list of Vars + // corresponding to tracked words from the Go variable v->node. + // Walk the list and set all the bits. + // For a large struct this could end up being quadratic: + // after the first setting, the outer loop (for z, i) would see a 1 bit + // for all of the remaining words in the struct, and for each such + // word would go through and turn on all the bits again. + // To avoid the quadratic behavior, we only turn on the bits if + // v is the head of the list or if the head's bit is not yet turned on. + // This will set the bits at most twice, keeping the overall loop linear. + v1 = v->node->opt; + j = v1 - var; + if(v == v1 || !btest(&cal, j)) { + for(; v1 != nil; v1 = v1->nextinnode) { + j = v1 - var; + biset(&cal, j); + } + } + } + } + break; + + case ATEXT: + for(z=0; zset.b[z]) | + r1->use1.b[z] | r1->use2.b[z]; + cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); + r1->refbehind.b[z] = ref.b[z]; + r1->calbehind.b[z] = cal.b[z]; + } + if(r1->f.active) + break; + r1->f.active = 1; + } + for(; r != r1; r = (Reg*)r->f.p1) + for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) + prop(r2, r->refbehind, r->calbehind); +} + +void +synch(Reg *r, Bits dif) +{ + Reg *r1; + int z; + + for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { + for(z=0; zrefbehind.b[z] & r1->refahead.b[z])) | + r1->set.b[z] | r1->regdiff.b[z]; + if(dif.b[z] != r1->regdiff.b[z]) { + r1->regdiff.b[z] = dif.b[z]; + change++; + } + } + if(r1->f.active) + break; + r1->f.active = 1; + for(z=0; zcalbehind.b[z] & r1->calahead.b[z]); + if(r1->f.s2 != nil) + synch((Reg*)r1->f.s2, dif); + } +} + +uint64 +allreg(uint64 b, Rgn *r) +{ + Var *v; + int i; + + v = var + r->varno; + r->regno = 0; + switch(v->etype) { + + default: + fatal("unknown etype %d/%E", bitno(b), v->etype); + break; + + case TINT8: + case TUINT8: + case TINT16: + case TUINT16: + case TINT32: + case TUINT32: + case TINT64: + case TUINT64: + case TINT: + case TUINT: + case TUINTPTR: + case TBOOL: + case TPTR32: + case TPTR64: + i = arch.BtoR(~b); + if(i && r->cost > 0) { + r->regno = i; + return arch.RtoB(i); + } + break; + + case TFLOAT32: + case TFLOAT64: + i = arch.BtoF(~b); + if(i && r->cost > 0) { + r->regno = i; + return arch.FtoB(i); + } + break; + } + return 0; +} + +void +paint1(Reg *r, int bn) +{ + Reg *r1; + int z; + uint64 bb; + + z = bn/64; + bb = 1LL<<(bn%64); + if(r->act.b[z] & bb) + return; + for(;;) { + if(!(r->refbehind.b[z] & bb)) + break; + r1 = (Reg*)r->f.p1; + if(r1 == R) + break; + if(!(r1->refahead.b[z] & bb)) + break; + if(r1->act.b[z] & bb) + break; + r = r1; + } + + if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { + change -= CLOAD * r->f.loop; + } + for(;;) { + r->act.b[z] |= bb; + + if(r->f.prog->as != ANOP) { // don't give credit for NOPs + if(r->use1.b[z] & bb) + change += CREF * r->f.loop; + if((r->use2.b[z]|r->set.b[z]) & bb) + change += CREF * r->f.loop; + } + + if(STORE(r) & r->regdiff.b[z] & bb) { + change -= CLOAD * r->f.loop; + } + + if(r->refbehind.b[z] & bb) + for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) + if(r1->refahead.b[z] & bb) + paint1(r1, bn); + + if(!(r->refahead.b[z] & bb)) + break; + r1 = (Reg*)r->f.s2; + if(r1 != R) + if(r1->refbehind.b[z] & bb) + paint1(r1, bn); + r = (Reg*)r->f.s1; + if(r == R) + break; + if(r->act.b[z] & bb) + break; + if(!(r->refbehind.b[z] & bb)) + break; + } +} + +uint64 +paint2(Reg *r, int bn, int depth) +{ + Reg *r1; + int z; + uint64 bb, vreg; + + z = bn/64; + bb = 1LL << (bn%64); + vreg = regbits; + if(!(r->act.b[z] & bb)) + return vreg; + for(;;) { + if(!(r->refbehind.b[z] & bb)) + break; + r1 = (Reg*)r->f.p1; + if(r1 == R) + break; + if(!(r1->refahead.b[z] & bb)) + break; + if(!(r1->act.b[z] & bb)) + break; + r = r1; + } + for(;;) { + if(debug['R'] && debug['v']) + print(" paint2 %d %P\n", depth, r->f.prog); + + r->act.b[z] &= ~bb; + + vreg |= r->regu; + + if(r->refbehind.b[z] & bb) + for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) + if(r1->refahead.b[z] & bb) + vreg |= paint2(r1, bn, depth+1); + + if(!(r->refahead.b[z] & bb)) + break; + r1 = (Reg*)r->f.s2; + if(r1 != R) + if(r1->refbehind.b[z] & bb) + vreg |= paint2(r1, bn, depth+1); + r = (Reg*)r->f.s1; + if(r == R) + break; + if(!(r->act.b[z] & bb)) + break; + if(!(r->refbehind.b[z] & bb)) + break; + } + + return vreg; +} + +void +paint3(Reg *r, int bn, uint64 rb, int rn) +{ + Reg *r1; + Prog *p; + int z; + uint64 bb; + + z = bn/64; + bb = 1LL << (bn%64); + if(r->act.b[z] & bb) + return; + for(;;) { + if(!(r->refbehind.b[z] & bb)) + break; + r1 = (Reg*)r->f.p1; + if(r1 == R) + break; + if(!(r1->refahead.b[z] & bb)) + break; + if(r1->act.b[z] & bb) + break; + r = r1; + } + + if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) + addmove(r, bn, rn, 0); + for(;;) { + r->act.b[z] |= bb; + p = r->f.prog; + + if(r->use1.b[z] & bb) { + if(debug['R'] && debug['v']) + print("%P", p); + addreg(&p->from, rn); + if(debug['R'] && debug['v']) + print(" ===change== %P\n", p); + } + if((r->use2.b[z]|r->set.b[z]) & bb) { + if(debug['R'] && debug['v']) + print("%P", p); + addreg(&p->to, rn); + if(debug['R'] && debug['v']) + print(" ===change== %P\n", p); + } + + if(STORE(r) & r->regdiff.b[z] & bb) + addmove(r, bn, rn, 1); + r->regu |= rb; + + if(r->refbehind.b[z] & bb) + for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) + if(r1->refahead.b[z] & bb) + paint3(r1, bn, rb, rn); + + if(!(r->refahead.b[z] & bb)) + break; + r1 = (Reg*)r->f.s2; + if(r1 != R) + if(r1->refbehind.b[z] & bb) + paint3(r1, bn, rb, rn); + r = (Reg*)r->f.s1; + if(r == R) + break; + if(r->act.b[z] & bb) + break; + if(!(r->refbehind.b[z] & bb)) + break; + } +} + +void +addreg(Adr *a, int rn) +{ + a->sym = nil; + a->node = nil; + a->offset = 0; + a->type = TYPE_REG; + a->reg = rn; + a->name = 0; + + ostats.ncvtreg++; +} + +void +dumpone(Flow *f, int isreg) +{ + int z; + Bits bit; + Reg *r; + + print("%d:%P", f->loop, f->prog); + if(isreg) { + r = (Reg*)f; + for(z=0; zset.b[z] | + r->use1.b[z] | + r->use2.b[z] | + r->refbehind.b[z] | + r->refahead.b[z] | + r->calbehind.b[z] | + r->calahead.b[z] | + r->regdiff.b[z] | + r->act.b[z] | + 0; + if(bany(&bit)) { + print("\t"); + if(bany(&r->set)) + print(" s:%Q", r->set); + if(bany(&r->use1)) + print(" u1:%Q", r->use1); + if(bany(&r->use2)) + print(" u2:%Q", r->use2); + if(bany(&r->refbehind)) + print(" rb:%Q ", r->refbehind); + if(bany(&r->refahead)) + print(" ra:%Q ", r->refahead); + if(bany(&r->calbehind)) + print(" cb:%Q ", r->calbehind); + if(bany(&r->calahead)) + print(" ca:%Q ", r->calahead); + if(bany(&r->regdiff)) + print(" d:%Q ", r->regdiff); + if(bany(&r->act)) + print(" a:%Q ", r->act); + } + } + print("\n"); +} + +void +dumpit(char *str, Flow *r0, int isreg) +{ + Flow *r, *r1; + + print("\n%s\n", str); + for(r = r0; r != nil; r = r->link) { + dumpone(r, isreg); + r1 = r->p2; + if(r1 != nil) { + print(" pred:"); + for(; r1 != nil; r1 = r1->p2link) + print(" %.4ud", (int)r1->prog->pc); + if(r->p1 != nil) + print(" (and %.4ud)", (int)r->p1->prog->pc); + else + print(" (only)"); + print("\n"); + } + // Print successors if it's not just the next one + if(r->s1 != r->link || r->s2 != nil) { + print(" succ:"); + if(r->s1 != nil) + print(" %.4ud", (int)r->s1->prog->pc); + if(r->s2 != nil) + print(" %.4ud", (int)r->s2->prog->pc); + print("\n"); + } + } +}