Now there is only one registerizer shared among all the systems.
There are some unfortunate special cases based on arch.thechar
in reg.c, to preserve bit-for-bit compatibility during the refactoring.
Most are probably bugs one way or another and should be revisited.
Change-Id: I153b435c0eaa05bbbeaf8876822eeb6dedaae3cf
Reviewed-on: https://go-review.googlesource.com/3883
Reviewed-by: Austin Clements <austin@google.com>
arch.ginscall = ginscall;
arch.igen = igen;
arch.linkarchinit = linkarchinit;
+ arch.peep = peep;
arch.proginfo = proginfo;
arch.regalloc = regalloc;
arch.regfree = regfree;
- arch.regopt = regopt;
arch.regtyp = regtyp;
arch.sameaddr = sameaddr;
arch.smallindir = smallindir;
arch.stackaddr = stackaddr;
+ arch.excludedregs = excludedregs;
+ arch.RtoB = RtoB;
+ arch.FtoB = RtoB;
+ arch.BtoR = BtoR;
+ arch.BtoF = BtoF;
+ arch.optoas = optoas;
+ arch.doregbits = doregbits;
+ arch.regnames = regnames;
gcmain(argc, argv);
}
int smallindir(Addr*, Addr*);
int stackaddr(Addr*);
Prog* unpatch(Prog*);
+
+/*
+ * reg.c
+ */
+uint64 excludedregs(void);
+uint64 RtoB(int);
+uint64 FtoB(int);
+int BtoR(uint64);
+int BtoF(uint64);
+uint64 doregbits(int);
+char** regnames(int*);
+
+/*
+ * peep.c
+ */
+void peep(Prog*);
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
static Prog* appendpp(Prog*, int, int, int, int32, int, int, int32);
static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *r0);
+++ /dev/null
-// Inferno utils/5c/gc.h
-// http://code.google.com/p/inferno-os/source/browse/utils/5c/gc.h
-//
-// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
-// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
-// Portions Copyright © 1997-1999 Vita Nuova Limited
-// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
-// Portions Copyright © 2004,2006 Bruce Ellis
-// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
-// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
-// Portions Copyright © 2009 The Go Authors. All rights reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-
-#define Z N
-#define Adr Addr
-
-#define D_HI TYPE_NONE
-#define D_LO TYPE_NONE
-
-#define BLOAD(r) band(bnot(r->refbehind), r->refahead)
-#define BSTORE(r) band(bnot(r->calbehind), r->calahead)
-#define LOAD(r) (~r->refbehind.b[z] & r->refahead.b[z])
-#define STORE(r) (~r->calbehind.b[z] & r->calahead.b[z])
-
-#define CLOAD 5
-#define CREF 5
-#define CINF 1000
-#define LOOP 3
-
-typedef struct Reg Reg;
-typedef struct Rgn Rgn;
-
-/*c2go
-extern Node *Z;
-enum
-{
- D_HI = TYPE_NONE,
- D_LO = TYPE_NONE,
- CLOAD = 5,
- CREF = 5,
- CINF = 1000,
- LOOP = 3,
-};
-
-uint32 BLOAD(Reg*);
-uint32 BSTORE(Reg*);
-uint64 LOAD(Reg*);
-uint64 STORE(Reg*);
-*/
-
-// A Reg is a wrapper around a single Prog (one instruction) that holds
-// register optimization information while the optimizer runs.
-// r->prog is the instruction.
-// r->prog->opt points back to r.
-struct Reg
-{
- Flow f;
-
- Bits set; // regopt variables written by this instruction.
- Bits use1; // regopt variables read by prog->from.
- Bits use2; // regopt variables read by prog->to.
-
- // refahead/refbehind are the regopt variables whose current
- // value may be used in the following/preceding instructions
- // up to a CALL (or the value is clobbered).
- Bits refbehind;
- Bits refahead;
- // calahead/calbehind are similar, but for variables in
- // instructions that are reachable after hitting at least one
- // CALL.
- Bits calbehind;
- Bits calahead;
- Bits regdiff;
- Bits act;
-
- int32 regu; // register used bitmap
-};
-#define R ((Reg*)0)
-/*c2go extern Reg *R; */
-
-#define NRGN 600
-/*c2go enum { NRGN = 600 }; */
-
-// A Rgn represents a single regopt variable over a region of code
-// where a register could potentially be dedicated to that variable.
-// The code encompassed by a Rgn is defined by the flow graph,
-// starting at enter, flood-filling forward while varno is refahead
-// and backward while varno is refbehind, and following branches. A
-// single variable may be represented by multiple disjoint Rgns and
-// each Rgn may choose a different register for that variable.
-// Registers are allocated to regions greedily in order of descending
-// cost.
-struct Rgn
-{
- Reg* enter;
- short cost;
- short varno;
- short regno;
-};
-
-EXTERN Reg zreg;
-EXTERN Reg* freer;
-EXTERN Reg** rpo2r;
-EXTERN Rgn region[NRGN];
-EXTERN Rgn* rgp;
-EXTERN int nregion;
-EXTERN int nvar;
-EXTERN int32 regbits;
-EXTERN Bits externs;
-EXTERN Bits params;
-EXTERN Bits consts;
-EXTERN Bits addrs;
-EXTERN Bits ivar;
-EXTERN Bits ovar;
-EXTERN int change;
-EXTERN int32 maxnr;
-EXTERN int32* idom;
-
-EXTERN struct
-{
- int32 ncvtreg;
- int32 nspill;
- int32 nreload;
- int32 ndelmov;
- int32 nvar;
- int32 naddr;
-} ostats;
-
-/*
- * reg.c
- */
-Reg* rega(void);
-int rcmp(const void*, const void*);
-void regopt(Prog*);
-void addmove(Reg*, int, int, int);
-Bits mkvar(Reg *r, Adr *a);
-void prop(Reg*, Bits, Bits);
-void synch(Reg*, Bits);
-uint32 allreg(uint32, Rgn*);
-void paint1(Reg*, int);
-uint32 paint2(Reg*, int, int);
-void paint3(Reg*, int, uint32, int);
-void addreg(Adr*, int);
-void dumpit(char *str, Flow *r0, int);
-
-/*
- * peep.c
- */
-void peep(Prog*);
-void excise(Flow*);
-int copyu(Prog*, Adr*, Adr*);
-
-uint32 RtoB(int);
-uint32 FtoB(int);
-int BtoR(uint32);
-int BtoF(uint32);
-
-/*
- * prog.c
- */
-void proginfo(ProgInfo*, Prog*);
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
static int xtramodes(Graph*, Flow*, Adr*);
static int shortprop(Flow *r);
static int copyau1(Prog *p, Adr *v);
static int isdconst(Addr *a);
static int isfloatreg(Addr*);
+static int copyu(Prog *p, Adr *v, Adr *s);
static uint32 gactive;
* 4 if set and used
* 0 otherwise (not touched)
*/
-int
+static int
copyu(Prog *p, Adr *v, Adr *s)
{
switch(p->as) {
a->reg == reg->reg &&
0 <= a->offset && a->offset < 4096;
}
+
+void
+excise(Flow *r)
+{
+ Prog *p;
+
+ p = r->prog;
+ nopout(p);
+}
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
enum
{
if(((p->scond & C_SCOND) != C_SCOND_NONE) && (info->flags & RightWrite))
info->flags |= RightRead;
+
+ switch(p->as) {
+ case ADIV:
+ case ADIVU:
+ case AMOD:
+ case AMODU:
+ info->regset |= RtoB(REG_R12);
+ break;
+ }
}
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
-#define NREGVAR 32
-#define REGBITS ((uint64)0xffffffffull)
-/*c2go enum {
+enum {
NREGVAR = 32,
- REGBITS = 0xffffffff,
};
-*/
-
- void addsplits(void);
-static Reg* firstr;
-static int first = 1;
-
-int
-rcmp(const void *a1, const void *a2)
-{
- Rgn *p1, *p2;
- int c1, c2;
-
- p1 = (Rgn*)a1;
- p2 = (Rgn*)a2;
- c1 = p2->cost;
- c2 = p1->cost;
- if(c1 -= c2)
- return c1;
- return p2->varno - p1->varno;
-}
-
-void
-excise(Flow *r)
-{
- Prog *p;
-
- p = r->prog;
- nopout(p);
-}
-
-static void
-setaddrs(Bits bit)
-{
- int i, n;
- Var *v;
- Node *node;
-
- while(bany(&bit)) {
- // convert each bit to a variable
- i = bnum(bit);
- node = var[i].node;
- n = var[i].name;
- biclr(&bit, i);
-
- // disable all pieces of that variable
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node && v->name == n)
- v->addr = 2;
- }
- }
-}
static char* regname[] = {
".R0",
".F15",
};
-static Node* regnodes[NREGVAR];
-
-static void walkvardef(Node *n, Reg *r, int active);
-
-void
-regopt(Prog *firstp)
-{
- Reg *r, *r1;
- Prog *p;
- Graph *g;
- int i, z, active;
- uint32 vreg;
- Bits bit;
- ProgInfo info;
-
- if(first) {
- fmtinstall('Q', Qconv);
- first = 0;
- }
-
- mergetemp(firstp);
-
- /*
- * control flow is more complicated in generated go code
- * than in generated c code. define pseudo-variables for
- * registers, so we have complete register usage information.
- */
- nvar = NREGVAR;
- memset(var, 0, NREGVAR*sizeof var[0]);
- for(i=0; i<NREGVAR; i++) {
- if(regnodes[i] == N)
- regnodes[i] = newname(lookup(regname[i]));
- var[i].node = regnodes[i];
- }
-
- regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
- for(z=0; z<BITS; z++) {
- externs.b[z] = 0;
- params.b[z] = 0;
- consts.b[z] = 0;
- addrs.b[z] = 0;
- ivar.b[z] = 0;
- ovar.b[z] = 0;
- }
-
- /*
- * pass 1
- * build aux data structure
- * allocate pcs
- * find use and set of variables
- */
- g = flowstart(firstp, sizeof(Reg));
- if(g == nil) {
- for(i=0; i<nvar; i++)
- var[i].node->opt = nil;
- return;
- }
-
- firstr = (Reg*)g->start;
-
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- p = r->f.prog;
- if(p->as == AVARDEF || p->as == AVARKILL)
- continue;
- proginfo(&info, p);
-
- // Avoid making variables for direct-called functions.
- if(p->as == ABL && p->to.name == NAME_EXTERN)
- continue;
-
- bit = mkvar(r, &p->from);
- if(info.flags & LeftRead)
- for(z=0; z<BITS; z++)
- r->use1.b[z] |= bit.b[z];
- if(info.flags & LeftAddr)
- setaddrs(bit);
-
- if(info.flags & RegRead)
- r->use1.b[0] |= RtoB(p->reg);
-
- if(info.flags & (RightAddr | RightRead | RightWrite)) {
- bit = mkvar(r, &p->to);
- if(info.flags & RightAddr)
- setaddrs(bit);
- if(info.flags & RightRead)
- for(z=0; z<BITS; z++)
- r->use2.b[z] |= bit.b[z];
- if(info.flags & RightWrite)
- for(z=0; z<BITS; z++)
- r->set.b[z] |= bit.b[z];
- }
-
- /* the mod/div runtime routines smash R12 */
- if(p->as == ADIV || p->as == ADIVU || p->as == AMOD || p->as == AMODU)
- r->set.b[0] |= RtoB(REG_R12);
- }
- if(firstr == R)
- return;
-
- for(i=0; i<nvar; i++) {
- Var *v = var+i;
- if(v->addr) {
- bit = blsh(i);
- for(z=0; z<BITS; z++)
- addrs.b[z] |= bit.b[z];
- }
-
- if(debug['R'] && debug['v'])
- print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
- i, v->addr, v->etype, v->width, v->node, v->offset);
- }
-
- if(debug['R'] && debug['v'])
- dumpit("pass1", &firstr->f, 1);
-
- /*
- * pass 2
- * find looping structure
- */
- flowrpo(g);
-
- if(debug['R'] && debug['v'])
- dumpit("pass2", &firstr->f, 1);
-
- /*
- * pass 2.5
- * iterate propagating fat vardef covering forward
- * r->act records vars with a VARDEF since the last CALL.
- * (r->act will be reused in pass 5 for something else,
- * but we'll be done with it by then.)
- */
- active = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- r->f.active = 0;
- r->act = zbits;
- }
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- p = r->f.prog;
- if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) {
- active++;
- walkvardef(p->to.node, r, active);
- }
- }
-
- /*
- * pass 3
- * iterate propagating usage
- * back until flow graph is complete
- */
-loop1:
- change = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->f.active = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- if(r->f.prog->as == ARET)
- prop(r, zbits, zbits);
-loop11:
- /* pick up unreachable code */
- i = 0;
- for(r = firstr; r != R; r = r1) {
- r1 = (Reg*)r->f.link;
- if(r1 && r1->f.active && !r->f.active) {
- prop(r, zbits, zbits);
- i = 1;
- }
- }
- if(i)
- goto loop11;
- if(change)
- goto loop1;
-
- if(debug['R'] && debug['v'])
- dumpit("pass3", &firstr->f, 1);
-
-
- /*
- * pass 4
- * iterate propagating register/variable synchrony
- * forward until graph is complete
- */
-loop2:
- change = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->f.active = 0;
- synch(firstr, zbits);
- if(change)
- goto loop2;
-
- addsplits();
-
- if(debug['R'] && debug['v'])
- dumpit("pass4", &firstr->f, 1);
-
- if(debug['R'] > 1) {
- print("\nprop structure:\n");
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- print("%d:%P", r->f.loop, r->f.prog);
- for(z=0; z<BITS; z++) {
- bit.b[z] = r->set.b[z] |
- r->refahead.b[z] | r->calahead.b[z] |
- r->refbehind.b[z] | r->calbehind.b[z] |
- r->use1.b[z] | r->use2.b[z];
- bit.b[z] &= ~addrs.b[z];
- }
-
- if(bany(&bit)) {
- print("\t");
- if(bany(&r->use1))
- print(" u1=%Q", r->use1);
- if(bany(&r->use2))
- print(" u2=%Q", r->use2);
- if(bany(&r->set))
- print(" st=%Q", r->set);
- if(bany(&r->refahead))
- print(" ra=%Q", r->refahead);
- if(bany(&r->calahead))
- print(" ca=%Q", r->calahead);
- if(bany(&r->refbehind))
- print(" rb=%Q", r->refbehind);
- if(bany(&r->calbehind))
- print(" cb=%Q", r->calbehind);
- }
- print("\n");
- }
- }
-
- /*
- * pass 4.5
- * move register pseudo-variables into regu.
- */
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
-
- r->set.b[0] &= ~REGBITS;
- r->use1.b[0] &= ~REGBITS;
- r->use2.b[0] &= ~REGBITS;
- r->refbehind.b[0] &= ~REGBITS;
- r->refahead.b[0] &= ~REGBITS;
- r->calbehind.b[0] &= ~REGBITS;
- r->calahead.b[0] &= ~REGBITS;
- r->regdiff.b[0] &= ~REGBITS;
- r->act.b[0] &= ~REGBITS;
- }
-
- if(debug['R'] && debug['v'])
- dumpit("pass4.5", &firstr->f, 1);
-
- /*
- * pass 5
- * isolate regions
- * calculate costs (paint1)
- */
- r = firstr;
- if(r) {
- for(z=0; z<BITS; z++)
- bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
- ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
- if(bany(&bit) && !r->f.refset) {
- // should never happen - all variables are preset
- if(debug['w'])
- print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
- r->f.refset = 1;
- }
- }
-
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->act = zbits;
- rgp = region;
- nregion = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- for(z=0; z<BITS; z++)
- bit.b[z] = r->set.b[z] &
- ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
- if(bany(&bit) && !r->f.refset) {
- if(debug['w'])
- print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
- r->f.refset = 1;
- excise(&r->f);
- }
- for(z=0; z<BITS; z++)
- bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
- while(bany(&bit)) {
- i = bnum(bit);
- rgp->enter = r;
- rgp->varno = i;
- change = 0;
- if(debug['R'] > 1)
- print("\n");
- paint1(r, i);
- biclr(&bit, i);
- if(change <= 0) {
- if(debug['R'])
- print("%L $%d: %Q\n",
- r->f.prog->lineno, change, blsh(i));
- continue;
- }
- rgp->cost = change;
- nregion++;
- if(nregion >= NRGN) {
- if(debug['R'] > 1)
- print("too many regions\n");
- goto brk;
- }
- rgp++;
- }
- }
-brk:
- qsort(region, nregion, sizeof(region[0]), rcmp);
-
- if(debug['R'] && debug['v'])
- dumpit("pass5", &firstr->f, 1);
-
- /*
- * pass 6
- * determine used registers (paint2)
- * replace code (paint3)
- */
- rgp = region;
- if(debug['R'] && debug['v'])
- print("\nregisterizing\n");
- for(i=0; i<nregion; i++) {
- if(debug['R'] && debug['v'])
- print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
- bit = blsh(rgp->varno);
- vreg = paint2(rgp->enter, rgp->varno, 0);
- vreg = allreg(vreg, rgp);
- if(debug['R']) {
- print("%L $%d %R: %Q\n",
- rgp->enter->f.prog->lineno,
- rgp->cost,
- rgp->regno,
- bit);
- }
- if(rgp->regno != 0)
- paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
- rgp++;
- }
-
- /*
- * free aux structures. peep allocates new ones.
- */
- for(i=0; i<nvar; i++)
- var[i].node->opt = nil;
- flowend(g);
- firstr = R;
-
- if(debug['R'] && debug['v']) {
- // Rebuild flow graph, since we inserted instructions
- g = flowstart(firstp, sizeof(Reg));
- firstr = (Reg*)g->start;
- dumpit("pass6", &firstr->f, 1);
- flowend(g);
- firstr = R;
- }
-
- /*
- * pass 7
- * peep-hole on basic block
- */
- if(!debug['R'] || debug['P']) {
- peep(firstp);
- }
-
- if(debug['R'] && debug['v'])
- dumpit("pass7", &firstr->f, 1);
-
- /*
- * last pass
- * eliminate nops
- * free aux structures
- * adjust the stack pointer
- * MOVW.W R1,-12(R13) <<- start
- * MOVW R0,R1
- * MOVW R1,8(R13)
- * MOVW $0,R1
- * MOVW R1,4(R13)
- * BL ,runtime.newproc+0(SB)
- * MOVW &ft+-32(SP),R7 <<- adjust
- * MOVW &j+-40(SP),R6 <<- adjust
- * MOVW autotmp_0003+-24(SP),R5 <<- adjust
- * MOVW $12(R13),R13 <<- finish
- */
- vreg = 0;
- for(p = firstp; p != P; p = p->link) {
- while(p->link != P && p->link->as == ANOP)
- p->link = p->link->link;
- if(p->to.type == TYPE_BRANCH)
- while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
- p->to.u.branch = p->to.u.branch->link;
- if(p->as == AMOVW && p->to.reg == 13) {
- if(p->scond & C_WBIT) {
- vreg = -p->to.offset; // in adjust region
-// print("%P adjusting %d\n", p, vreg);
- continue;
- }
- if(p->from.type == TYPE_CONST && p->to.type == TYPE_REG) {
- if(p->from.offset != vreg)
- print("in and out different\n");
-// print("%P finish %d\n", p, vreg);
- vreg = 0; // done adjust region
- continue;
- }
-
-// print("%P %d %d from type\n", p, p->from.type, TYPE_CONST);
-// print("%P %d %d to type\n\n", p, p->to.type, TYPE_REG);
- }
-
- if(p->as == AMOVW && vreg != 0) {
- if(p->from.sym != nil)
- if(p->from.name == NAME_AUTO || p->from.name == NAME_PARAM) {
- p->from.offset += vreg;
-// print("%P adjusting from %d %d\n", p, vreg, p->from.type);
- }
- if(p->to.sym != nil)
- if(p->to.name == NAME_AUTO || p->to.name == NAME_PARAM) {
- p->to.offset += vreg;
-// print("%P adjusting to %d %d\n", p, vreg, p->from.type);
- }
- }
- }
-}
-
-static void
-walkvardef(Node *n, Reg *r, int active)
-{
- Reg *r1, *r2;
- int bn;
- Var *v;
-
- for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
- if(r1->f.active == active)
- break;
- r1->f.active = active;
- if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
- break;
- for(v=n->opt; v!=nil; v=v->nextinnode) {
- bn = v - var;
- biset(&r1->act, bn);
- }
- if(r1->f.prog->as == ABL)
- break;
- }
-
- for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
- if(r2->f.s2 != nil)
- walkvardef(n, (Reg*)r2->f.s2, active);
-}
-
-void
-addsplits(void)
-{
- Reg *r, *r1;
- int z, i;
- Bits bit;
-
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- if(r->f.loop > 1)
- continue;
- if(r->f.prog->as == ABL)
- continue;
- if(r->f.prog->as == ADUFFZERO)
- continue;
- if(r->f.prog->as == ADUFFCOPY)
- continue;
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) {
- if(r1->f.loop <= 1)
- continue;
- for(z=0; z<BITS; z++)
- bit.b[z] = r1->calbehind.b[z] &
- (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
- ~(r->calahead.b[z] & addrs.b[z]);
- while(bany(&bit)) {
- i = bnum(bit);
- biclr(&bit, i);
- }
- }
- }
-}
-
-/*
- * add mov b,rn
- * just after r
- */
-void
-addmove(Reg *r, int bn, int rn, int f)
-{
- Prog *p, *p1, *p2;
- Adr *a;
- Var *v;
-
- p1 = mal(sizeof(*p1));
- *p1 = zprog;
- p = r->f.prog;
-
- // If there's a stack fixup coming (after BL newproc or BL deferproc),
- // delay the load until after the fixup.
- p2 = p->link;
- if(p2 && p2->as == AMOVW && p2->from.type == TYPE_ADDR && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == TYPE_REG)
- p = p2;
-
- p1->link = p->link;
- p->link = p1;
- p1->lineno = p->lineno;
-
- v = var + bn;
-
- a = &p1->to;
- a->name = v->name;
- a->node = v->node;
- a->sym = linksym(v->node->sym);
- a->offset = v->offset;
- a->etype = v->etype;
- a->type = TYPE_MEM;
- if(a->etype == TARRAY)
- a->type = TYPE_ADDR;
- else if(a->sym == nil)
- a->type = TYPE_CONST;
-
- if(v->addr)
- fatal("addmove: shouldn't be doing this %A\n", a);
-
- switch(v->etype) {
- default:
- print("What is this %E\n", v->etype);
-
- case TINT8:
- p1->as = AMOVBS;
- break;
- case TBOOL:
- case TUINT8:
-//print("movbu %E %d %S\n", v->etype, bn, v->sym);
- p1->as = AMOVBU;
- break;
- case TINT16:
- p1->as = AMOVHS;
- break;
- case TUINT16:
- p1->as = AMOVHU;
- break;
- case TINT32:
- case TUINT32:
- case TPTR32:
- p1->as = AMOVW;
- break;
- case TFLOAT32:
- p1->as = AMOVF;
- break;
- case TFLOAT64:
- p1->as = AMOVD;
- break;
- }
-
- p1->from.type = TYPE_REG;
- p1->from.reg = rn;
- if(!f) {
- p1->from = *a;
- *a = zprog.from;
- a->type = TYPE_REG;
- a->reg = rn;
- if(v->etype == TUINT8 || v->etype == TBOOL)
- p1->as = AMOVBU;
- if(v->etype == TUINT16)
- p1->as = AMOVHU;
- }
- if(debug['R'])
- print("%P\t.a%P\n", p, p1);
-}
-
-static int
-overlap(int32 o1, int w1, int32 o2, int w2)
-{
- int32 t1, t2;
-
- t1 = o1+w1;
- t2 = o2+w2;
-
- if(!(t1 > o2 && t2 > o1))
- return 0;
-
- return 1;
-}
-
-Bits
-mkvar(Reg *r, Adr *a)
-{
- Var *v;
- int i, t, n, et, z, w, flag;
- int32 o;
- Bits bit;
- Node *node;
-
- // mark registers used
- t = a->type;
-
- flag = 0;
- switch(t) {
- default:
- print("mkvar: type %d %d %D\n", t, a->name, a);
- goto none;
-
- case TYPE_NONE:
- case TYPE_FCONST:
- case TYPE_BRANCH:
- break;
-
-
- case TYPE_REGREG:
- case TYPE_REGREG2:
- bit = zbits;
- if(a->offset != 0)
- bit.b[0] |= RtoB(a->offset);
- if(a->reg != 0)
- bit.b[0] |= RtoB(a->reg);
- return bit;
-
- case TYPE_CONST:
- if(a->reg != 0)
- fatal("found CONST instead of ADDR: %D", a);
- break;
-
- case TYPE_ADDR:
- case TYPE_REG:
- case TYPE_SHIFT:
- if(a->reg != 0) {
- bit = zbits;
- bit.b[0] = RtoB(a->reg);
- return bit;
- }
- break;
-
- case TYPE_MEM:
- if(a->reg != 0) {
- if(a == &r->f.prog->from)
- r->use1.b[0] |= RtoB(a->reg);
- else
- r->use2.b[0] |= RtoB(a->reg);
- if(r->f.prog->scond & (C_PBIT|C_WBIT))
- r->set.b[0] |= RtoB(a->reg);
- }
- break;
- }
-
- switch(a->name) {
- default:
- goto none;
-
- case NAME_EXTERN:
- case NAME_STATIC:
- case NAME_AUTO:
- case NAME_PARAM:
- n = a->name;
- break;
- }
-
- node = a->node;
- if(node == N || node->op != ONAME || node->orig == N)
- goto none;
- node = node->orig;
- if(node->orig != node)
- fatal("%D: bad node", a);
- if(node->sym == S || node->sym->name[0] == '.')
- goto none;
- et = a->etype;
- o = a->offset;
- w = a->width;
- if(w < 0)
- fatal("bad width %d for %D", w, a);
-
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node && v->name == n) {
- if(v->offset == o)
- if(v->etype == et)
- if(v->width == w)
- if(!flag)
- return blsh(i);
-
- // if they overlap, disable both
- if(overlap(v->offset, v->width, o, w)) {
- v->addr = 1;
- flag = 1;
- }
- }
- }
-
- switch(et) {
- case 0:
- case TFUNC:
- goto none;
- }
-
- if(nvar >= NVAR) {
- if(debug['w'] > 1 && node)
- fatal("variable not optimized: %D", a);
-
- // If we're not tracking a word in a variable, mark the rest as
- // having its address taken, so that we keep the whole thing
- // live at all calls. otherwise we might optimize away part of
- // a variable but not all of it.
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node)
- v->addr = 1;
- }
- goto none;
- }
-
- i = nvar;
- nvar++;
-//print("var %d %E %D %S\n", i, et, a, s);
- v = var+i;
- v->offset = o;
- v->name = n;
- v->etype = et;
- v->width = w;
- v->addr = flag; // funny punning
- v->node = node;
-
- // node->opt is the head of a linked list
- // of Vars within the given Node, so that
- // we can start at a Var and find all the other
- // Vars in the same Go variable.
- v->nextinnode = node->opt;
- node->opt = v;
-
- bit = blsh(i);
- if(n == NAME_EXTERN || n == NAME_STATIC)
- for(z=0; z<BITS; z++)
- externs.b[z] |= bit.b[z];
- if(n == NAME_PARAM)
- for(z=0; z<BITS; z++)
- params.b[z] |= bit.b[z];
-
- if(node->class == PPARAM)
- for(z=0; z<BITS; z++)
- ivar.b[z] |= bit.b[z];
- if(node->class == PPARAMOUT)
- for(z=0; z<BITS; z++)
- ovar.b[z] |= bit.b[z];
-
- // Treat values with their address taken as live at calls,
- // because the garbage collector's liveness analysis in ../gc/plive.c does.
- // These must be consistent or else we will elide stores and the garbage
- // collector will see uninitialized data.
- // The typical case where our own analysis is out of sync is when the
- // node appears to have its address taken but that code doesn't actually
- // get generated and therefore doesn't show up as an address being
- // taken when we analyze the instruction stream.
- // One instance of this case is when a closure uses the same name as
- // an outer variable for one of its own variables declared with :=.
- // The parser flags the outer variable as possibly shared, and therefore
- // sets addrtaken, even though it ends up not being actually shared.
- // If we were better about _ elision, _ = &x would suffice too.
- // The broader := in a closure problem is mentioned in a comment in
- // closure.c:/^typecheckclosure and dcl.c:/^oldname.
- if(node->addrtaken)
- v->addr = 1;
-
- // Disable registerization for globals, because:
- // (1) we might panic at any time and we want the recovery code
- // to see the latest values (issue 1304).
- // (2) we don't know what pointers might point at them and we want
- // loads via those pointers to see updated values and vice versa (issue 7995).
- //
- // Disable registerization for results if using defer, because the deferred func
- // might recover and return, causing the current values to be used.
- if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
- v->addr = 1;
-
- if(debug['R'])
- print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
-
- return bit;
-
-none:
- return zbits;
-}
-
-void
-prop(Reg *r, Bits ref, Bits cal)
+char**
+regnames(int *n)
{
- Reg *r1, *r2;
- int z, i, j;
- Var *v, *v1;
-
- for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
- for(z=0; z<BITS; z++) {
- ref.b[z] |= r1->refahead.b[z];
- if(ref.b[z] != r1->refahead.b[z]) {
- r1->refahead.b[z] = ref.b[z];
- change++;
- }
- cal.b[z] |= r1->calahead.b[z];
- if(cal.b[z] != r1->calahead.b[z]) {
- r1->calahead.b[z] = cal.b[z];
- change++;
- }
- }
- switch(r1->f.prog->as) {
- case ABL:
- if(noreturn(r1->f.prog))
- break;
-
- // Mark all input variables (ivar) as used, because that's what the
- // liveness bitmaps say. The liveness bitmaps say that so that a
- // panic will not show stale values in the parameter dump.
- // Mark variables with a recent VARDEF (r1->act) as used,
- // so that the optimizer flushes initializations to memory,
- // so that if a garbage collection happens during this CALL,
- // the collector will see initialized memory. Again this is to
- // match what the liveness bitmaps say.
- for(z=0; z<BITS; z++) {
- cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
- ref.b[z] = 0;
- }
-
- // cal.b is the current approximation of what's live across the call.
- // Every bit in cal.b is a single stack word. For each such word,
- // find all the other tracked stack words in the same Go variable
- // (struct/slice/string/interface) and mark them live too.
- // This is necessary because the liveness analysis for the garbage
- // collector works at variable granularity, not at word granularity.
- // It is fundamental for slice/string/interface: the garbage collector
- // needs the whole value, not just some of the words, in order to
- // interpret the other bits correctly. Specifically, slice needs a consistent
- // ptr and cap, string needs a consistent ptr and len, and interface
- // needs a consistent type word and data word.
- for(z=0; z<BITS; z++) {
- if(cal.b[z] == 0)
- continue;
- for(i=0; i<64; i++) {
- if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
- continue;
- v = var+z*64+i;
- if(v->node->opt == nil) // v represents fixed register, not Go variable
- continue;
-
- // v->node->opt is the head of a linked list of Vars
- // corresponding to tracked words from the Go variable v->node.
- // Walk the list and set all the bits.
- // For a large struct this could end up being quadratic:
- // after the first setting, the outer loop (for z, i) would see a 1 bit
- // for all of the remaining words in the struct, and for each such
- // word would go through and turn on all the bits again.
- // To avoid the quadratic behavior, we only turn on the bits if
- // v is the head of the list or if the head's bit is not yet turned on.
- // This will set the bits at most twice, keeping the overall loop linear.
- v1 = v->node->opt;
- j = v1 - var;
- if(v == v1 || !btest(&cal, j)) {
- for(; v1 != nil; v1 = v1->nextinnode) {
- j = v1 - var;
- biset(&cal, j);
- }
- }
- }
- }
- break;
-
- case ATEXT:
- for(z=0; z<BITS; z++) {
- cal.b[z] = 0;
- ref.b[z] = 0;
- }
- break;
-
- case ARET:
- for(z=0; z<BITS; z++) {
- cal.b[z] = externs.b[z] | ovar.b[z];
- ref.b[z] = 0;
- }
- break;
- }
- for(z=0; z<BITS; z++) {
- ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
- r1->use1.b[z] | r1->use2.b[z];
- cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
- r1->refbehind.b[z] = ref.b[z];
- r1->calbehind.b[z] = cal.b[z];
- }
- if(r1->f.active)
- break;
- r1->f.active = 1;
- }
- for(; r != r1; r = (Reg*)r->f.p1)
- for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
- prop(r2, r->refbehind, r->calbehind);
+ *n = NREGVAR;
+ return regname;
}
-void
-synch(Reg *r, Bits dif)
+uint64
+excludedregs(void)
{
- Reg *r1;
- int z;
-
- for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
- for(z=0; z<BITS; z++) {
- dif.b[z] = (dif.b[z] &
- ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
- r1->set.b[z] | r1->regdiff.b[z];
- if(dif.b[z] != r1->regdiff.b[z]) {
- r1->regdiff.b[z] = dif.b[z];
- change++;
- }
- }
- if(r1->f.active)
- break;
- r1->f.active = 1;
- for(z=0; z<BITS; z++)
- dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
- if(r1->f.s2 != nil)
- synch((Reg*)r1->f.s2, dif);
- }
+ return RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
}
-uint32
-allreg(uint32 b, Rgn *r)
+uint64
+doregbits(int r)
{
- Var *v;
- int i;
-
- v = var + r->varno;
- r->regno = 0;
- switch(v->etype) {
-
- default:
- fatal("unknown etype %d/%E", bitno(b), v->etype);
- break;
-
- case TINT8:
- case TUINT8:
- case TINT16:
- case TUINT16:
- case TINT32:
- case TUINT32:
- case TINT:
- case TUINT:
- case TUINTPTR:
- case TBOOL:
- case TPTR32:
- i = BtoR(~b);
- if(i && r->cost >= 0) {
- r->regno = i;
- return RtoB(i);
- }
- break;
-
- case TFLOAT32:
- case TFLOAT64:
- i = BtoF(~b);
- if(i && r->cost >= 0) {
- r->regno = i;
- return RtoB(i);
- }
- break;
-
- case TINT64:
- case TUINT64:
- case TPTR64:
- case TINTER:
- case TSTRUCT:
- case TARRAY:
- break;
- }
+ USED(r);
return 0;
}
-void
-paint1(Reg *r, int bn)
-{
- Reg *r1;
- Prog *p;
- int z;
- uint64 bb;
-
- z = bn/64;
- bb = 1LL<<(bn%64);
- if(r->act.b[z] & bb)
- return;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(r1->act.b[z] & bb)
- break;
- r = r1;
- }
-
- if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
- change -= CLOAD * r->f.loop;
- if(debug['R'] > 1)
- print("%d%P\td %Q $%d\n", r->f.loop,
- r->f.prog, blsh(bn), change);
- }
- for(;;) {
- r->act.b[z] |= bb;
- p = r->f.prog;
-
-
- if(r->f.prog->as != ANOP) { // don't give credit for NOPs
- if(r->use1.b[z] & bb) {
- change += CREF * r->f.loop;
- if(debug['R'] > 1)
- print("%d%P\tu1 %Q $%d\n", r->f.loop,
- p, blsh(bn), change);
- }
- if((r->use2.b[z]|r->set.b[z]) & bb) {
- change += CREF * r->f.loop;
- if(debug['R'] > 1)
- print("%d%P\tu2 %Q $%d\n", r->f.loop,
- p, blsh(bn), change);
- }
- }
-
- if(STORE(r) & r->regdiff.b[z] & bb) {
- change -= CLOAD * r->f.loop;
- if(debug['R'] > 1)
- print("%d%P\tst %Q $%d\n", r->f.loop,
- p, blsh(bn), change);
- }
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- paint1(r1, bn);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- paint1(r1, bn);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(r->act.b[z] & bb)
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
-}
-
-uint32
-paint2(Reg *r, int bn, int depth)
-{
- Reg *r1;
- int z;
- uint64 bb, vreg;
-
- z = bn/64;
- bb = 1LL << (bn%64);
- vreg = regbits;
- if(!(r->act.b[z] & bb))
- return vreg;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(!(r1->act.b[z] & bb))
- break;
- r = r1;
- }
- for(;;) {
- if(debug['R'] && debug['v'])
- print(" paint2 %d %P\n", depth, r->f.prog);
-
- r->act.b[z] &= ~bb;
-
- vreg |= r->regu;
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- vreg |= paint2(r1, bn, depth+1);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- vreg |= paint2(r1, bn, depth+1);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(!(r->act.b[z] & bb))
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
- return vreg;
-}
-
-void
-paint3(Reg *r, int bn, uint32 rb, int rn)
-{
- Reg *r1;
- Prog *p;
- int z;
- uint64 bb;
-
- z = bn/64;
- bb = 1LL << (bn%64);
- if(r->act.b[z] & bb)
- return;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(r1->act.b[z] & bb)
- break;
- r = r1;
- }
-
- if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
- addmove(r, bn, rn, 0);
-
- for(;;) {
- r->act.b[z] |= bb;
- p = r->f.prog;
-
- if(r->use1.b[z] & bb) {
- if(debug['R'])
- print("%P", p);
- addreg(&p->from, rn);
- if(debug['R'])
- print("\t.c%P\n", p);
- }
- if((r->use2.b[z]|r->set.b[z]) & bb) {
- if(debug['R'])
- print("%P", p);
- addreg(&p->to, rn);
- if(debug['R'])
- print("\t.c%P\n", p);
- }
-
- if(STORE(r) & r->regdiff.b[z] & bb)
- addmove(r, bn, rn, 1);
- r->regu |= rb;
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- paint3(r1, bn, rb, rn);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- paint3(r1, bn, rb, rn);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(r->act.b[z] & bb)
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
-}
-
-void
-addreg(Adr *a, int rn)
-{
- a->sym = nil;
- a->node = nil;
- a->name = NAME_NONE;
- a->type = TYPE_REG;
- a->reg = rn;
-}
-
/*
* bit reg
* 0 R0
* ... ...
* 31 F15
*/
-uint32
+uint64
RtoB(int r)
{
if(REG_R0 <= r && r <= REG_R15) {
if(r >= REGTMP-2 && r != REG_R12) // excluded R9 and R10 for m and g, but not R12
return 0;
- return 1L << (r - REG_R0);
+ return 1ULL << (r - REG_R0);
}
if(REG_F0 <= r && r <= REG_F15) {
if(r < REG_F2 || r > REG_F0+NFREG-1)
return 0;
- return 1L << ((r - REG_F0) + 16);
+ return 1ULL << ((r - REG_F0) + 16);
}
return 0;
}
int
-BtoR(uint32 b)
+BtoR(uint64 b)
{
// TODO Allow R0 and R1, but be careful with a 0 return
// TODO Allow R9. Only R10 is reserved now (just g, not m).
}
int
-BtoF(uint32 b)
+BtoF(uint64 b)
{
b &= 0xfffc0000L;
if(b == 0)
return 0;
return bitno(b) - 16 + REG_F0;
}
-
-void
-dumpone(Flow *f, int isreg)
-{
- int z;
- Bits bit;
- Reg *r;
-
- print("%d:%P", f->loop, f->prog);
- if(isreg) {
- r = (Reg*)f;
- for(z=0; z<BITS; z++)
- bit.b[z] =
- r->set.b[z] |
- r->use1.b[z] |
- r->use2.b[z] |
- r->refbehind.b[z] |
- r->refahead.b[z] |
- r->calbehind.b[z] |
- r->calahead.b[z] |
- r->regdiff.b[z] |
- r->act.b[z] |
- 0;
- if(bany(&bit)) {
- print("\t");
- if(bany(&r->set))
- print(" s:%Q", r->set);
- if(bany(&r->use1))
- print(" u1:%Q", r->use1);
- if(bany(&r->use2))
- print(" u2:%Q", r->use2);
- if(bany(&r->refbehind))
- print(" rb:%Q ", r->refbehind);
- if(bany(&r->refahead))
- print(" ra:%Q ", r->refahead);
- if(bany(&r->calbehind))
- print(" cb:%Q ", r->calbehind);
- if(bany(&r->calahead))
- print(" ca:%Q ", r->calahead);
- if(bany(&r->regdiff))
- print(" d:%Q ", r->regdiff);
- if(bany(&r->act))
- print(" a:%Q ", r->act);
- }
- }
- print("\n");
-}
-
-void
-dumpit(char *str, Flow *r0, int isreg)
-{
- Flow *r, *r1;
-
- print("\n%s\n", str);
- for(r = r0; r != nil; r = r->link) {
- dumpone(r, isreg);
- r1 = r->p2;
- if(r1 != nil) {
- print(" pred:");
- for(; r1 != nil; r1 = r1->p2link)
- print(" %.4ud", (int)r1->prog->pc);
- if(r->p1 != nil)
- print(" (and %.4ud)", (int)r->p1->prog->pc);
- else
- print(" (only)");
- print("\n");
- }
- // Print successors if it's not just the next one
- if(r->s1 != r->link || r->s2 != nil) {
- print(" succ:");
- if(r->s1 != nil)
- print(" %.4ud", (int)r->s1->prog->pc);
- if(r->s2 != nil)
- print(" %.4ud", (int)r->s2->prog->pc);
- print("\n");
- }
- }
-}
arch.ginscall = ginscall;
arch.igen = igen;
arch.linkarchinit = linkarchinit;
+ arch.peep = peep;
arch.proginfo = proginfo;
arch.regalloc = regalloc;
arch.regfree = regfree;
- arch.regopt = regopt;
arch.regtyp = regtyp;
arch.sameaddr = sameaddr;
arch.smallindir = smallindir;
arch.stackaddr = stackaddr;
+ arch.excludedregs = excludedregs;
+ arch.RtoB = RtoB;
+ arch.FtoB = FtoB;
+ arch.BtoR = BtoR;
+ arch.BtoF = BtoF;
+ arch.optoas = optoas;
+ arch.doregbits = doregbits;
+ arch.regnames = regnames;
gcmain(argc, argv);
}
int stackaddr(Addr*);
Prog* unpatch(Prog*);
+/*
+ * reg.c
+ */
+uint64 excludedregs(void);
+uint64 RtoB(int);
+uint64 FtoB(int);
+int BtoR(uint64);
+int BtoF(uint64);
+uint64 doregbits(int);
+char** regnames(int*);
+
+/*
+ * peep.c
+ */
+void peep(Prog*);
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
static Prog *appendpp(Prog*, int, int, int, vlong, int, int, vlong);
static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax);
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
static void conprop(Flow *r);
static void elimshortmov(Graph *g);
static int copyas(Adr*, Adr*);
static int copyau(Adr*, Adr*);
static int copysub(Adr*, Adr*, Adr*, int);
+static int copyu(Prog*, Adr*, Adr*);
static uint32 gactive;
+enum
+{
+ exregoffset = REG_R15,
+};
+
// do we need the carry bit
static int
needc(Prog *p)
* 4 if set and used
* 0 otherwise (not touched)
*/
-int
+static int
copyu(Prog *p, Adr *v, Adr *s)
{
ProgInfo info;
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
// Matches real RtoB but can be used in global initializer.
#define RtoB(r) (1<<((r)-REG_AX))
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
-#define NREGVAR 32 /* 16 general + 16 floating */
-#define REGBITS ((uint64)0xffffffffull)
-/*c2go enum {
+enum {
NREGVAR = 32,
- REGBITS = 0xffffffff,
};
-*/
-
-static Reg* firstr;
-static int first = 1;
-
-int
-rcmp(const void *a1, const void *a2)
-{
- Rgn *p1, *p2;
- int c1, c2;
-
- p1 = (Rgn*)a1;
- p2 = (Rgn*)a2;
- c1 = p2->cost;
- c2 = p1->cost;
- if(c1 -= c2)
- return c1;
- return p2->varno - p1->varno;
-}
-
-static void
-setaddrs(Bits bit)
-{
- int i, n;
- Var *v;
- Node *node;
-
- while(bany(&bit)) {
- // convert each bit to a variable
- i = bnum(bit);
- node = var[i].node;
- n = var[i].name;
- biclr(&bit, i);
-
- // disable all pieces of that variable
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node && v->name == n)
- v->addr = 2;
- }
- }
-}
static char* regname[] = {
".AX",
".X15",
};
-static Node* regnodes[NREGVAR];
-
-static void walkvardef(Node *n, Reg *r, int active);
-
-void
-regopt(Prog *firstp)
-{
- Reg *r, *r1;
- Prog *p;
- Graph *g;
- ProgInfo info;
- int i, z, active;
- uint32 vreg;
- Bits bit;
-
- if(first) {
- fmtinstall('Q', Qconv);
- exregoffset = REG_R15;
- first = 0;
- }
-
- mergetemp(firstp);
-
- /*
- * control flow is more complicated in generated go code
- * than in generated c code. define pseudo-variables for
- * registers, so we have complete register usage information.
- */
- nvar = NREGVAR;
- memset(var, 0, NREGVAR*sizeof var[0]);
- for(i=0; i<NREGVAR; i++) {
- if(regnodes[i] == N)
- regnodes[i] = newname(lookup(regname[i]));
- var[i].node = regnodes[i];
- }
-
- regbits = RtoB(REG_SP);
- for(z=0; z<BITS; z++) {
- externs.b[z] = 0;
- params.b[z] = 0;
- consts.b[z] = 0;
- addrs.b[z] = 0;
- ivar.b[z] = 0;
- ovar.b[z] = 0;
- }
-
- /*
- * pass 1
- * build aux data structure
- * allocate pcs
- * find use and set of variables
- */
- g = flowstart(firstp, sizeof(Reg));
- if(g == nil) {
- for(i=0; i<nvar; i++)
- var[i].node->opt = nil;
- return;
- }
-
- firstr = (Reg*)g->start;
-
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- p = r->f.prog;
- if(p->as == AVARDEF || p->as == AVARKILL)
- continue;
- proginfo(&info, p);
-
- // Avoid making variables for direct-called functions.
- if(p->as == ACALL && p->to.type == TYPE_MEM && p->to.name == NAME_EXTERN)
- continue;
-
- r->use1.b[0] |= info.reguse | info.regindex;
- r->set.b[0] |= info.regset;
-
- bit = mkvar(r, &p->from);
- if(bany(&bit)) {
- if(info.flags & LeftAddr)
- setaddrs(bit);
- if(info.flags & LeftRead)
- for(z=0; z<BITS; z++)
- r->use1.b[z] |= bit.b[z];
- if(info.flags & LeftWrite)
- for(z=0; z<BITS; z++)
- r->set.b[z] |= bit.b[z];
- }
-
- bit = mkvar(r, &p->to);
- if(bany(&bit)) {
- if(info.flags & RightAddr)
- setaddrs(bit);
- if(info.flags & RightRead)
- for(z=0; z<BITS; z++)
- r->use2.b[z] |= bit.b[z];
- if(info.flags & RightWrite)
- for(z=0; z<BITS; z++)
- r->set.b[z] |= bit.b[z];
- }
- }
-
- for(i=0; i<nvar; i++) {
- Var *v = var+i;
- if(v->addr) {
- bit = blsh(i);
- for(z=0; z<BITS; z++)
- addrs.b[z] |= bit.b[z];
- }
-
- if(debug['R'] && debug['v'])
- print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
- i, v->addr, v->etype, v->width, v->node, v->offset);
- }
-
- if(debug['R'] && debug['v'])
- dumpit("pass1", &firstr->f, 1);
-
- /*
- * pass 2
- * find looping structure
- */
- flowrpo(g);
-
- if(debug['R'] && debug['v'])
- dumpit("pass2", &firstr->f, 1);
-
- /*
- * pass 2.5
- * iterate propagating fat vardef covering forward
- * r->act records vars with a VARDEF since the last CALL.
- * (r->act will be reused in pass 5 for something else,
- * but we'll be done with it by then.)
- */
- active = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- r->f.active = 0;
- r->act = zbits;
- }
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- p = r->f.prog;
- if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) {
- active++;
- walkvardef(p->to.node, r, active);
- }
- }
-
- /*
- * pass 3
- * iterate propagating usage
- * back until flow graph is complete
- */
-loop1:
- change = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->f.active = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- if(r->f.prog->as == ARET)
- prop(r, zbits, zbits);
-loop11:
- /* pick up unreachable code */
- i = 0;
- for(r = firstr; r != R; r = r1) {
- r1 = (Reg*)r->f.link;
- if(r1 && r1->f.active && !r->f.active) {
- prop(r, zbits, zbits);
- i = 1;
- }
- }
- if(i)
- goto loop11;
- if(change)
- goto loop1;
-
- if(debug['R'] && debug['v'])
- dumpit("pass3", &firstr->f, 1);
-
- /*
- * pass 4
- * iterate propagating register/variable synchrony
- * forward until graph is complete
- */
-loop2:
- change = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->f.active = 0;
- synch(firstr, zbits);
- if(change)
- goto loop2;
-
- if(debug['R'] && debug['v'])
- dumpit("pass4", &firstr->f, 1);
-
- /*
- * pass 4.5
- * move register pseudo-variables into regu.
- */
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
-
- r->set.b[0] &= ~REGBITS;
- r->use1.b[0] &= ~REGBITS;
- r->use2.b[0] &= ~REGBITS;
- r->refbehind.b[0] &= ~REGBITS;
- r->refahead.b[0] &= ~REGBITS;
- r->calbehind.b[0] &= ~REGBITS;
- r->calahead.b[0] &= ~REGBITS;
- r->regdiff.b[0] &= ~REGBITS;
- r->act.b[0] &= ~REGBITS;
- }
-
- /*
- * pass 5
- * isolate regions
- * calculate costs (paint1)
- */
- r = firstr;
- if(r) {
- for(z=0; z<BITS; z++)
- bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
- ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
- if(bany(&bit) && !r->f.refset) {
- // should never happen - all variables are preset
- if(debug['w'])
- print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
- r->f.refset = 1;
- }
- }
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->act = zbits;
- rgp = region;
- nregion = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- for(z=0; z<BITS; z++)
- bit.b[z] = r->set.b[z] &
- ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
- if(bany(&bit) && !r->f.refset) {
- if(debug['w'])
- print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
- r->f.refset = 1;
- excise(&r->f);
- }
- for(z=0; z<BITS; z++)
- bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
- while(bany(&bit)) {
- i = bnum(bit);
- rgp->enter = r;
- rgp->varno = i;
- change = 0;
- paint1(r, i);
- biclr(&bit, i);
- if(change <= 0)
- continue;
- rgp->cost = change;
- nregion++;
- if(nregion >= NRGN) {
- if(debug['R'] && debug['v'])
- print("too many regions\n");
- goto brk;
- }
- rgp++;
- }
- }
-brk:
- qsort(region, nregion, sizeof(region[0]), rcmp);
-
- if(debug['R'] && debug['v'])
- dumpit("pass5", &firstr->f, 1);
-
- /*
- * pass 6
- * determine used registers (paint2)
- * replace code (paint3)
- */
- rgp = region;
- if(debug['R'] && debug['v'])
- print("\nregisterizing\n");
- for(i=0; i<nregion; i++) {
- if(debug['R'] && debug['v'])
- print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
- bit = blsh(rgp->varno);
- vreg = paint2(rgp->enter, rgp->varno, 0);
- vreg = allreg(vreg, rgp);
- if(rgp->regno != 0) {
- if(debug['R'] && debug['v']) {
- Var *v;
-
- v = var + rgp->varno;
- print("registerize %N+%lld (bit=%2d et=%2E) in %R\n",
- v->node, v->offset, rgp->varno, v->etype, rgp->regno);
- }
- paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
- }
- rgp++;
- }
-
- /*
- * free aux structures. peep allocates new ones.
- */
- for(i=0; i<nvar; i++)
- var[i].node->opt = nil;
- flowend(g);
- firstr = R;
-
- if(debug['R'] && debug['v']) {
- // Rebuild flow graph, since we inserted instructions
- g = flowstart(firstp, sizeof(Reg));
- firstr = (Reg*)g->start;
- dumpit("pass6", &firstr->f, 1);
- flowend(g);
- firstr = R;
- }
-
- /*
- * pass 7
- * peep-hole on basic block
- */
- if(!debug['R'] || debug['P'])
- peep(firstp);
-
- /*
- * eliminate nops
- */
- for(p=firstp; p!=P; p=p->link) {
- while(p->link != P && p->link->as == ANOP)
- p->link = p->link->link;
- if(p->to.type == TYPE_BRANCH)
- while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
- p->to.u.branch = p->to.u.branch->link;
- }
-
- if(debug['R']) {
- if(ostats.ncvtreg ||
- ostats.nspill ||
- ostats.nreload ||
- ostats.ndelmov ||
- ostats.nvar ||
- ostats.naddr ||
- 0)
- print("\nstats\n");
-
- if(ostats.ncvtreg)
- print(" %4d cvtreg\n", ostats.ncvtreg);
- if(ostats.nspill)
- print(" %4d spill\n", ostats.nspill);
- if(ostats.nreload)
- print(" %4d reload\n", ostats.nreload);
- if(ostats.ndelmov)
- print(" %4d delmov\n", ostats.ndelmov);
- if(ostats.nvar)
- print(" %4d var\n", ostats.nvar);
- if(ostats.naddr)
- print(" %4d addr\n", ostats.naddr);
-
- memset(&ostats, 0, sizeof(ostats));
- }
-}
-
-static void
-walkvardef(Node *n, Reg *r, int active)
+char**
+regnames(int *n)
{
- Reg *r1, *r2;
- int bn;
- Var *v;
-
- for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
- if(r1->f.active == active)
- break;
- r1->f.active = active;
- if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
- break;
- for(v=n->opt; v!=nil; v=v->nextinnode) {
- bn = v - var;
- biset(&r1->act, bn);
- }
- if(r1->f.prog->as == ACALL)
- break;
- }
-
- for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
- if(r2->f.s2 != nil)
- walkvardef(n, (Reg*)r2->f.s2, active);
+ *n = NREGVAR;
+ return regname;
}
-/*
- * add mov b,rn
- * just after r
- */
-void
-addmove(Reg *r, int bn, int rn, int f)
+uint64
+excludedregs(void)
{
- Prog *p, *p1;
- Adr *a;
- Var *v;
-
- p1 = mal(sizeof(*p1));
- clearp(p1);
- p1->pc = 9999;
-
- p = r->f.prog;
- p1->link = p->link;
- p->link = p1;
- p1->lineno = p->lineno;
-
- v = var + bn;
-
- a = &p1->to;
- a->offset = v->offset;
- a->etype = v->etype;
- a->type = TYPE_MEM;
- a->name = v->name;
- a->node = v->node;
- a->sym = linksym(v->node->sym);
-
- // need to clean this up with wptr and
- // some of the defaults
- p1->as = AMOVL;
- switch(simtype[(uchar)v->etype]) {
- default:
- fatal("unknown type %E", v->etype);
- case TINT8:
- case TUINT8:
- case TBOOL:
- p1->as = AMOVB;
- break;
- case TINT16:
- case TUINT16:
- p1->as = AMOVW;
- break;
- case TINT64:
- case TUINT64:
- case TPTR64:
- p1->as = AMOVQ;
- break;
- case TFLOAT32:
- p1->as = AMOVSS;
- break;
- case TFLOAT64:
- p1->as = AMOVSD;
- break;
- case TINT32:
- case TUINT32:
- case TPTR32:
- break;
- }
-
- p1->from.type = TYPE_REG;
- p1->from.reg = rn;
- p1->from.name = NAME_NONE;
- if(!f) {
- p1->from = *a;
- *a = zprog.from;
- a->type = TYPE_REG;
- a->reg = rn;
- if(v->etype == TUINT8)
- p1->as = AMOVB;
- if(v->etype == TUINT16)
- p1->as = AMOVW;
- }
- if(debug['R'] && debug['v'])
- print("%P ===add=== %P\n", p, p1);
- ostats.nspill++;
+ return RtoB(REG_SP);
}
-uint32
+uint64
doregbits(int r)
{
- uint32 b;
+ uint64 b;
b = 0;
if(r >= REG_AX && r <= REG_R15)
return b;
}
-static int
-overlap(int64 o1, int w1, int64 o2, int w2)
-{
- int64 t1, t2;
-
- t1 = o1+w1;
- t2 = o2+w2;
-
- if(!(t1 > o2 && t2 > o1))
- return 0;
-
- return 1;
-}
-
-Bits
-mkvar(Reg *r, Adr *a)
-{
- Var *v;
- int i, n, et, z, flag;
- int64 w;
- uint32 regu;
- int64 o;
- Bits bit;
- Node *node;
-
- /*
- * mark registers used
- */
- if(a->type == TYPE_NONE)
- goto none;
-
- if(r != R)
- r->use1.b[0] |= doregbits(a->index);
-
- switch(a->type) {
- default:
- regu = doregbits(a->reg);
- if(regu == 0)
- goto none;
- bit = zbits;
- bit.b[0] = regu;
- return bit;
-
- case TYPE_ADDR:
- a->type = TYPE_MEM;
- bit = mkvar(r, a);
- setaddrs(bit);
- a->type = TYPE_ADDR;
- ostats.naddr++;
- goto none;
-
- case TYPE_MEM:
- switch(a->name) {
- default:
- goto none;
- case NAME_EXTERN:
- case NAME_STATIC:
- case NAME_PARAM:
- case NAME_AUTO:
- n = a->name;
- break;
- }
- }
-
- node = a->node;
- if(node == N || node->op != ONAME || node->orig == N)
- goto none;
- node = node->orig;
- if(node->orig != node)
- fatal("%D: bad node", a);
- if(node->sym == S || node->sym->name[0] == '.')
- goto none;
- et = a->etype;
- o = a->offset;
- w = a->width;
- if(w < 0)
- fatal("bad width %lld for %D", w, a);
-
- flag = 0;
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node && v->name == n) {
- if(v->offset == o)
- if(v->etype == et)
- if(v->width == w)
- return blsh(i);
-
- // if they overlaps, disable both
- if(overlap(v->offset, v->width, o, w)) {
-// print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
- v->addr = 1;
- flag = 1;
- }
- }
- }
- switch(et) {
- case 0:
- case TFUNC:
- goto none;
- }
-
- if(nvar >= NVAR) {
- if(debug['w'] > 1 && node != N)
- fatal("variable not optimized: %#N", node);
-
- // If we're not tracking a word in a variable, mark the rest as
- // having its address taken, so that we keep the whole thing
- // live at all calls. otherwise we might optimize away part of
- // a variable but not all of it.
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node)
- v->addr = 1;
- }
- goto none;
- }
-
- i = nvar;
- nvar++;
- v = var+i;
- v->offset = o;
- v->name = n;
- v->etype = et;
- v->width = w;
- v->addr = flag; // funny punning
- v->node = node;
-
- // node->opt is the head of a linked list
- // of Vars within the given Node, so that
- // we can start at a Var and find all the other
- // Vars in the same Go variable.
- v->nextinnode = node->opt;
- node->opt = v;
-
- bit = blsh(i);
- if(n == NAME_EXTERN || n == NAME_STATIC)
- for(z=0; z<BITS; z++)
- externs.b[z] |= bit.b[z];
- if(n == NAME_PARAM)
- for(z=0; z<BITS; z++)
- params.b[z] |= bit.b[z];
-
- if(node->class == PPARAM)
- for(z=0; z<BITS; z++)
- ivar.b[z] |= bit.b[z];
- if(node->class == PPARAMOUT)
- for(z=0; z<BITS; z++)
- ovar.b[z] |= bit.b[z];
-
- // Treat values with their address taken as live at calls,
- // because the garbage collector's liveness analysis in ../gc/plive.c does.
- // These must be consistent or else we will elide stores and the garbage
- // collector will see uninitialized data.
- // The typical case where our own analysis is out of sync is when the
- // node appears to have its address taken but that code doesn't actually
- // get generated and therefore doesn't show up as an address being
- // taken when we analyze the instruction stream.
- // One instance of this case is when a closure uses the same name as
- // an outer variable for one of its own variables declared with :=.
- // The parser flags the outer variable as possibly shared, and therefore
- // sets addrtaken, even though it ends up not being actually shared.
- // If we were better about _ elision, _ = &x would suffice too.
- // The broader := in a closure problem is mentioned in a comment in
- // closure.c:/^typecheckclosure and dcl.c:/^oldname.
- if(node->addrtaken)
- v->addr = 1;
-
- // Disable registerization for globals, because:
- // (1) we might panic at any time and we want the recovery code
- // to see the latest values (issue 1304).
- // (2) we don't know what pointers might point at them and we want
- // loads via those pointers to see updated values and vice versa (issue 7995).
- //
- // Disable registerization for results if using defer, because the deferred func
- // might recover and return, causing the current values to be used.
- if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
- v->addr = 1;
-
- if(debug['R'])
- print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
- ostats.nvar++;
-
- return bit;
-
-none:
- return zbits;
-}
-
-void
-prop(Reg *r, Bits ref, Bits cal)
-{
- Reg *r1, *r2;
- int z, i, j;
- Var *v, *v1;
-
- for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
- for(z=0; z<BITS; z++) {
- ref.b[z] |= r1->refahead.b[z];
- if(ref.b[z] != r1->refahead.b[z]) {
- r1->refahead.b[z] = ref.b[z];
- change++;
- }
- cal.b[z] |= r1->calahead.b[z];
- if(cal.b[z] != r1->calahead.b[z]) {
- r1->calahead.b[z] = cal.b[z];
- change++;
- }
- }
- switch(r1->f.prog->as) {
- case ACALL:
- if(noreturn(r1->f.prog))
- break;
-
- // Mark all input variables (ivar) as used, because that's what the
- // liveness bitmaps say. The liveness bitmaps say that so that a
- // panic will not show stale values in the parameter dump.
- // Mark variables with a recent VARDEF (r1->act) as used,
- // so that the optimizer flushes initializations to memory,
- // so that if a garbage collection happens during this CALL,
- // the collector will see initialized memory. Again this is to
- // match what the liveness bitmaps say.
- for(z=0; z<BITS; z++) {
- cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
- ref.b[z] = 0;
- }
-
- // cal.b is the current approximation of what's live across the call.
- // Every bit in cal.b is a single stack word. For each such word,
- // find all the other tracked stack words in the same Go variable
- // (struct/slice/string/interface) and mark them live too.
- // This is necessary because the liveness analysis for the garbage
- // collector works at variable granularity, not at word granularity.
- // It is fundamental for slice/string/interface: the garbage collector
- // needs the whole value, not just some of the words, in order to
- // interpret the other bits correctly. Specifically, slice needs a consistent
- // ptr and cap, string needs a consistent ptr and len, and interface
- // needs a consistent type word and data word.
- for(z=0; z<BITS; z++) {
- if(cal.b[z] == 0)
- continue;
- for(i=0; i<64; i++) {
- if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
- continue;
- v = var+z*64+i;
- if(v->node->opt == nil) // v represents fixed register, not Go variable
- continue;
-
- // v->node->opt is the head of a linked list of Vars
- // corresponding to tracked words from the Go variable v->node.
- // Walk the list and set all the bits.
- // For a large struct this could end up being quadratic:
- // after the first setting, the outer loop (for z, i) would see a 1 bit
- // for all of the remaining words in the struct, and for each such
- // word would go through and turn on all the bits again.
- // To avoid the quadratic behavior, we only turn on the bits if
- // v is the head of the list or if the head's bit is not yet turned on.
- // This will set the bits at most twice, keeping the overall loop linear.
- v1 = v->node->opt;
- j = v1 - var;
- if(v == v1 || !btest(&cal, j)) {
- for(; v1 != nil; v1 = v1->nextinnode) {
- j = v1 - var;
- biset(&cal, j);
- }
- }
- }
- }
- break;
-
- case ATEXT:
- for(z=0; z<BITS; z++) {
- cal.b[z] = 0;
- ref.b[z] = 0;
- }
- break;
-
- case ARET:
- for(z=0; z<BITS; z++) {
- cal.b[z] = externs.b[z] | ovar.b[z];
- ref.b[z] = 0;
- }
- break;
- }
- for(z=0; z<BITS; z++) {
- ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
- r1->use1.b[z] | r1->use2.b[z];
- cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
- r1->refbehind.b[z] = ref.b[z];
- r1->calbehind.b[z] = cal.b[z];
- }
- if(r1->f.active)
- break;
- r1->f.active = 1;
- }
- for(; r != r1; r = (Reg*)r->f.p1)
- for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
- prop(r2, r->refbehind, r->calbehind);
-}
-
-void
-synch(Reg *r, Bits dif)
-{
- Reg *r1;
- int z;
-
- for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
- for(z=0; z<BITS; z++) {
- dif.b[z] = (dif.b[z] &
- ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
- r1->set.b[z] | r1->regdiff.b[z];
- if(dif.b[z] != r1->regdiff.b[z]) {
- r1->regdiff.b[z] = dif.b[z];
- change++;
- }
- }
- if(r1->f.active)
- break;
- r1->f.active = 1;
- for(z=0; z<BITS; z++)
- dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
- if(r1->f.s2 != nil)
- synch((Reg*)r1->f.s2, dif);
- }
-}
-
-uint32
-allreg(uint32 b, Rgn *r)
-{
- Var *v;
- int i;
-
- v = var + r->varno;
- r->regno = 0;
- switch(v->etype) {
-
- default:
- fatal("unknown etype %d/%E", bitno(b), v->etype);
- break;
-
- case TINT8:
- case TUINT8:
- case TINT16:
- case TUINT16:
- case TINT32:
- case TUINT32:
- case TINT64:
- case TUINT64:
- case TINT:
- case TUINT:
- case TUINTPTR:
- case TBOOL:
- case TPTR32:
- case TPTR64:
- i = BtoR(~b);
- if(i && r->cost > 0) {
- r->regno = i;
- return RtoB(i);
- }
- break;
-
- case TFLOAT32:
- case TFLOAT64:
- i = BtoF(~b);
- if(i && r->cost > 0) {
- r->regno = i;
- return FtoB(i);
- }
- break;
- }
- return 0;
-}
-
-void
-paint1(Reg *r, int bn)
-{
- Reg *r1;
- int z;
- uint64 bb;
-
- z = bn/64;
- bb = 1LL<<(bn%64);
- if(r->act.b[z] & bb)
- return;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(r1->act.b[z] & bb)
- break;
- r = r1;
- }
-
- if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
- change -= CLOAD * r->f.loop;
- }
- for(;;) {
- r->act.b[z] |= bb;
-
- if(r->f.prog->as != ANOP) { // don't give credit for NOPs
- if(r->use1.b[z] & bb)
- change += CREF * r->f.loop;
- if((r->use2.b[z]|r->set.b[z]) & bb)
- change += CREF * r->f.loop;
- }
-
- if(STORE(r) & r->regdiff.b[z] & bb) {
- change -= CLOAD * r->f.loop;
- }
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- paint1(r1, bn);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- paint1(r1, bn);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(r->act.b[z] & bb)
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
-}
-
-uint32
-paint2(Reg *r, int bn, int depth)
-{
- Reg *r1;
- int z;
- uint64 bb, vreg;
-
- z = bn/64;
- bb = 1LL << (bn%64);
- vreg = regbits;
- if(!(r->act.b[z] & bb))
- return vreg;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(!(r1->act.b[z] & bb))
- break;
- r = r1;
- }
- for(;;) {
- if(debug['R'] && debug['v'])
- print(" paint2 %d %P\n", depth, r->f.prog);
-
- r->act.b[z] &= ~bb;
-
- vreg |= r->regu;
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- vreg |= paint2(r1, bn, depth+1);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- vreg |= paint2(r1, bn, depth+1);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(!(r->act.b[z] & bb))
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
-
- return vreg;
-}
-
-void
-paint3(Reg *r, int bn, uint32 rb, int rn)
-{
- Reg *r1;
- Prog *p;
- int z;
- uint64 bb;
-
- z = bn/64;
- bb = 1LL << (bn%64);
- if(r->act.b[z] & bb)
- return;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(r1->act.b[z] & bb)
- break;
- r = r1;
- }
-
- if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
- addmove(r, bn, rn, 0);
- for(;;) {
- r->act.b[z] |= bb;
- p = r->f.prog;
-
- if(r->use1.b[z] & bb) {
- if(debug['R'] && debug['v'])
- print("%P", p);
- addreg(&p->from, rn);
- if(debug['R'] && debug['v'])
- print(" ===change== %P\n", p);
- }
- if((r->use2.b[z]|r->set.b[z]) & bb) {
- if(debug['R'] && debug['v'])
- print("%P", p);
- addreg(&p->to, rn);
- if(debug['R'] && debug['v'])
- print(" ===change== %P\n", p);
- }
-
- if(STORE(r) & r->regdiff.b[z] & bb)
- addmove(r, bn, rn, 1);
- r->regu |= rb;
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- paint3(r1, bn, rb, rn);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- paint3(r1, bn, rb, rn);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(r->act.b[z] & bb)
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
-}
-
-void
-addreg(Adr *a, int rn)
-{
- a->sym = nil;
- a->node = nil;
- a->offset = 0;
- a->type = TYPE_REG;
- a->reg = rn;
- a->name = 0;
-
- ostats.ncvtreg++;
-}
-
-uint32
+uint64
RtoB(int r)
{
if(r < REG_AX || r > REG_R15)
return 0;
- return 1L << (r-REG_AX);
+ return 1ULL << (r-REG_AX);
}
int
-BtoR(uint32 b)
+BtoR(uint64 b)
{
- b &= 0xffffL;
+ b &= 0xffffULL;
if(nacl)
b &= ~((1<<(REG_BP-REG_AX)) | (1<<(REG_R15-REG_AX)));
else if(framepointer_enabled)
* ...
* 31 X15
*/
-uint32
+uint64
FtoB(int f)
{
if(f < REG_X0 || f > REG_X15)
return 0;
- return 1L << (f - REG_X0 + 16);
+ return 1ULL << (f - REG_X0 + 16);
}
int
-BtoF(uint32 b)
+BtoF(uint64 b)
{
b &= 0xFFFF0000L;
return 0;
return bitno(b) - 16 + REG_X0;
}
-
-void
-dumpone(Flow *f, int isreg)
-{
- int z;
- Bits bit;
- Reg *r;
-
- print("%d:%P", f->loop, f->prog);
- if(isreg) {
- r = (Reg*)f;
- for(z=0; z<BITS; z++)
- bit.b[z] =
- r->set.b[z] |
- r->use1.b[z] |
- r->use2.b[z] |
- r->refbehind.b[z] |
- r->refahead.b[z] |
- r->calbehind.b[z] |
- r->calahead.b[z] |
- r->regdiff.b[z] |
- r->act.b[z] |
- 0;
- if(bany(&bit)) {
- print("\t");
- if(bany(&r->set))
- print(" s:%Q", r->set);
- if(bany(&r->use1))
- print(" u1:%Q", r->use1);
- if(bany(&r->use2))
- print(" u2:%Q", r->use2);
- if(bany(&r->refbehind))
- print(" rb:%Q ", r->refbehind);
- if(bany(&r->refahead))
- print(" ra:%Q ", r->refahead);
- if(bany(&r->calbehind))
- print(" cb:%Q ", r->calbehind);
- if(bany(&r->calahead))
- print(" ca:%Q ", r->calahead);
- if(bany(&r->regdiff))
- print(" d:%Q ", r->regdiff);
- if(bany(&r->act))
- print(" a:%Q ", r->act);
- }
- }
- print("\n");
-}
-
-void
-dumpit(char *str, Flow *r0, int isreg)
-{
- Flow *r, *r1;
-
- print("\n%s\n", str);
- for(r = r0; r != nil; r = r->link) {
- dumpone(r, isreg);
- r1 = r->p2;
- if(r1 != nil) {
- print(" pred:");
- for(; r1 != nil; r1 = r1->p2link)
- print(" %.4ud", (int)r1->prog->pc);
- print("\n");
- }
- // Print successors if it's not just the next one
- if(r->s1 != r->link || r->s2 != nil) {
- print(" succ:");
- if(r->s1 != nil)
- print(" %.4ud", (int)r->s1->prog->pc);
- if(r->s2 != nil)
- print(" %.4ud", (int)r->s2->prog->pc);
- print("\n");
- }
- }
-}
arch.ginscall = ginscall;
arch.igen = igen;
arch.linkarchinit = linkarchinit;
+ arch.peep = peep;
arch.proginfo = proginfo;
arch.regalloc = regalloc;
arch.regfree = regfree;
- arch.regopt = regopt;
arch.regtyp = regtyp;
arch.sameaddr = sameaddr;
arch.smallindir = smallindir;
arch.stackaddr = stackaddr;
+ arch.excludedregs = excludedregs;
+ arch.RtoB = RtoB;
+ arch.FtoB = FtoB;
+ arch.BtoR = BtoR;
+ arch.BtoF = BtoF;
+ arch.optoas = optoas;
+ arch.doregbits = doregbits;
+ arch.regnames = regnames;
gcmain(argc, argv);
}
int smallindir(Addr*, Addr*);
int stackaddr(Addr*);
Prog* unpatch(Prog*);
+
+/*
+ * reg.c
+ */
+uint64 excludedregs(void);
+uint64 RtoB(int);
+uint64 FtoB(int);
+int BtoR(uint64);
+int BtoF(uint64);
+uint64 doregbits(int);
+char** regnames(int*);
+
+/*
+ * peep.c
+ */
+void peep(Prog*);
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
static Prog *appendpp(Prog*, int, int, int, vlong, int, int, vlong);
static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax);
case CASE(OAS, TPTR32):
a = AMOVL;
break;
+
+ case CASE(OAS, TFLOAT32):
+ a = AMOVSS;
+ break;
+
+ case CASE(OAS, TFLOAT64):
+ a = AMOVSD;
+ break;
case CASE(OADD, TINT8):
case CASE(OADD, TUINT8):
+++ /dev/null
-// Derived from Inferno utils/6c/gc.h
-// http://code.google.com/p/inferno-os/source/browse/utils/6c/gc.h
-//
-// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
-// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
-// Portions Copyright © 1997-1999 Vita Nuova Limited
-// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
-// Portions Copyright © 2004,2006 Bruce Ellis
-// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
-// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
-// Portions Copyright © 2009 The Go Authors. All rights reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-
-#define Z N
-#define Adr Addr
-
-#define BLOAD(r) band(bnot(r->refbehind), r->refahead)
-#define BSTORE(r) band(bnot(r->calbehind), r->calahead)
-#define LOAD(r) (~r->refbehind.b[z] & r->refahead.b[z])
-#define STORE(r) (~r->calbehind.b[z] & r->calahead.b[z])
-
-#define CLOAD 5
-#define CREF 5
-#define CINF 1000
-#define LOOP 3
-
-typedef struct Reg Reg;
-typedef struct Rgn Rgn;
-
-/*c2go
-extern Node *Z;
-enum
-{
- CLOAD = 5,
- CREF = 5,
- CINF = 1000,
- LOOP = 3,
-};
-
-uint32 BLOAD(Reg*);
-uint32 BSTORE(Reg*);
-uint64 LOAD(Reg*);
-uint64 STORE(Reg*);
-*/
-
-// A Reg is a wrapper around a single Prog (one instruction) that holds
-// register optimization information while the optimizer runs.
-// r->prog is the instruction.
-// r->prog->opt points back to r.
-struct Reg
-{
- Flow f;
-
- Bits set; // regopt variables written by this instruction.
- Bits use1; // regopt variables read by prog->from.
- Bits use2; // regopt variables read by prog->to.
-
- // refahead/refbehind are the regopt variables whose current
- // value may be used in the following/preceding instructions
- // up to a CALL (or the value is clobbered).
- Bits refbehind;
- Bits refahead;
- // calahead/calbehind are similar, but for variables in
- // instructions that are reachable after hitting at least one
- // CALL.
- Bits calbehind;
- Bits calahead;
- Bits regdiff;
- Bits act;
-
- int32 regu; // register used bitmap
- int32 rpo; // reverse post ordering
- int32 active;
-
- uint16 loop; // x5 for every loop
- uchar refset; // diagnostic generated
-
- Reg* p1; // predecessors of this instruction: p1,
- Reg* p2; // and then p2 linked though p2link.
- Reg* p2link;
- Reg* s1; // successors of this instruction (at most two: s1 and s2).
- Reg* s2;
- Reg* link; // next instruction in function code
- Prog* prog; // actual instruction
-};
-#define R ((Reg*)0)
-/*c2go extern Reg *R; */
-
-#define NRGN 600
-/*c2go enum { NRGN = 600 }; */
-
-// A Rgn represents a single regopt variable over a region of code
-// where a register could potentially be dedicated to that variable.
-// The code encompassed by a Rgn is defined by the flow graph,
-// starting at enter, flood-filling forward while varno is refahead
-// and backward while varno is refbehind, and following branches. A
-// single variable may be represented by multiple disjoint Rgns and
-// each Rgn may choose a different register for that variable.
-// Registers are allocated to regions greedily in order of descending
-// cost.
-struct Rgn
-{
- Reg* enter;
- short cost;
- short varno;
- short regno;
-};
-
-EXTERN int32 exregoffset; // not set
-EXTERN int32 exfregoffset; // not set
-EXTERN Reg zreg;
-EXTERN Reg* freer;
-EXTERN Reg** rpo2r;
-EXTERN Rgn region[NRGN];
-EXTERN Rgn* rgp;
-EXTERN int nregion;
-EXTERN int nvar;
-EXTERN int32 regbits;
-EXTERN int32 exregbits;
-EXTERN Bits externs;
-EXTERN Bits params;
-EXTERN Bits consts;
-EXTERN Bits addrs;
-EXTERN Bits ivar;
-EXTERN Bits ovar;
-EXTERN int change;
-EXTERN int32 maxnr;
-EXTERN int32* idom;
-
-EXTERN struct
-{
- int32 ncvtreg;
- int32 nspill;
- int32 nreload;
- int32 ndelmov;
- int32 nvar;
- int32 naddr;
-} ostats;
-
-/*
- * reg.c
- */
-Reg* rega(void);
-int rcmp(const void*, const void*);
-void regopt(Prog*);
-void addmove(Reg*, int, int, int);
-Bits mkvar(Reg*, Adr*);
-void prop(Reg*, Bits, Bits);
-void loopit(Reg*, int32);
-void synch(Reg*, Bits);
-uint32 allreg(uint32, Rgn*);
-void paint1(Reg*, int);
-uint32 paint2(Reg*, int, int);
-void paint3(Reg*, int, uint32, int);
-void addreg(Adr*, int);
-void dumpone(Flow*, int);
-void dumpit(char*, Flow*, int);
-
-/*
- * peep.c
- */
-void peep(Prog*);
-void excise(Flow*);
-int copyu(Prog*, Adr*, Adr*);
-
-uint32 RtoB(int);
-uint32 FtoB(int);
-int BtoR(uint32);
-int BtoF(uint32);
-
-/*
- * prog.c
- */
-void proginfo(ProgInfo*, Prog*);
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
enum {
REGEXT = 0,
+ exregoffset = REG_DI,
};
static void conprop(Flow *r);
static int copyas(Adr*, Adr*);
static int copyau(Adr*, Adr*);
static int copysub(Adr*, Adr*, Adr*, int);
+static int copyu(Prog*, Adr*, Adr*);
static uint32 gactive;
* 4 if set and used
* 0 otherwise (not touched)
*/
-int
+static int
copyu(Prog *p, Adr *v, Adr *s)
{
ProgInfo info;
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
// Matches real RtoB but can be used in global initializer.
#define RtoB(r) (1<<((r)-REG_AX))
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
-#define NREGVAR 16 /* 8 integer + 8 floating */
-#define REGBITS ((uint64)0xffffull)
-/*c2go enum {
- NREGVAR = 16,
- REGBITS = (1<<NREGVAR) - 1,
+enum {
+ NREGVAR = 16, /* 8 integer + 8 floating */
};
-*/
-
-static Reg* firstr;
-static int first = 1;
-
-int
-rcmp(const void *a1, const void *a2)
-{
- Rgn *p1, *p2;
- int c1, c2;
-
- p1 = (Rgn*)a1;
- p2 = (Rgn*)a2;
- c1 = p2->cost;
- c2 = p1->cost;
- if(c1 -= c2)
- return c1;
- return p2->varno - p1->varno;
-}
-
-static void
-setaddrs(Bits bit)
-{
- int i, n;
- Var *v;
- Node *node;
-
- while(bany(&bit)) {
- // convert each bit to a variable
- i = bnum(bit);
- node = var[i].node;
- n = var[i].name;
- biclr(&bit, i);
-
- // disable all pieces of that variable
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node && v->name == n)
- v->addr = 2;
- }
- }
-}
static char* regname[] = {
".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di",
".x0", ".x1", ".x2", ".x3", ".x4", ".x5", ".x6", ".x7",
};
-static Node* regnodes[NREGVAR];
-
-static void walkvardef(Node *n, Reg *r, int active);
-
-void
-regopt(Prog *firstp)
+char**
+regnames(int *n)
{
- Reg *r, *r1;
- Prog *p;
- Graph *g;
- ProgInfo info;
- int i, z, active;
- uint32 vreg;
- Bits bit;
-
- if(first) {
- fmtinstall('Q', Qconv);
- exregoffset = REG_DI; // no externals
- first = 0;
- }
-
- mergetemp(firstp);
-
- /*
- * control flow is more complicated in generated go code
- * than in generated c code. define pseudo-variables for
- * registers, so we have complete register usage information.
- */
- nvar = NREGVAR;
- memset(var, 0, NREGVAR*sizeof var[0]);
- for(i=0; i<NREGVAR; i++) {
- if(regnodes[i] == N)
- regnodes[i] = newname(lookup(regname[i]));
- var[i].node = regnodes[i];
- }
-
- regbits = RtoB(REG_SP);
- for(z=0; z<BITS; z++) {
- externs.b[z] = 0;
- params.b[z] = 0;
- consts.b[z] = 0;
- addrs.b[z] = 0;
- ivar.b[z] = 0;
- ovar.b[z] = 0;
- }
-
- /*
- * pass 1
- * build aux data structure
- * allocate pcs
- * find use and set of variables
- */
- g = flowstart(firstp, sizeof(Reg));
- if(g == nil) {
- for(i=0; i<nvar; i++)
- var[i].node->opt = nil;
- return;
- }
-
- firstr = (Reg*)g->start;
-
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- p = r->f.prog;
- if(p->as == AVARDEF || p->as == AVARKILL)
- continue;
- proginfo(&info, p);
-
- // Avoid making variables for direct-called functions.
- if(p->as == ACALL && p->to.type == TYPE_MEM && p->to.name == NAME_EXTERN)
- continue;
-
- r->use1.b[0] |= info.reguse | info.regindex;
- r->set.b[0] |= info.regset;
-
- bit = mkvar(r, &p->from);
- if(bany(&bit)) {
- if(info.flags & LeftAddr)
- setaddrs(bit);
- if(info.flags & LeftRead)
- for(z=0; z<BITS; z++)
- r->use1.b[z] |= bit.b[z];
- if(info.flags & LeftWrite)
- for(z=0; z<BITS; z++)
- r->set.b[z] |= bit.b[z];
- }
-
- bit = mkvar(r, &p->to);
- if(bany(&bit)) {
- if(info.flags & RightAddr)
- setaddrs(bit);
- if(info.flags & RightRead)
- for(z=0; z<BITS; z++)
- r->use2.b[z] |= bit.b[z];
- if(info.flags & RightWrite)
- for(z=0; z<BITS; z++)
- r->set.b[z] |= bit.b[z];
- }
- }
- if(firstr == R)
- return;
-
- for(i=0; i<nvar; i++) {
- Var *v = var+i;
- if(v->addr) {
- bit = blsh(i);
- for(z=0; z<BITS; z++)
- addrs.b[z] |= bit.b[z];
- }
-
- if(debug['R'] && debug['v'])
- print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
- i, v->addr, v->etype, v->width, v->node, v->offset);
- }
-
- if(debug['R'] && debug['v'])
- dumpit("pass1", &firstr->f, 1);
-
- /*
- * pass 2
- * find looping structure
- */
- flowrpo(g);
-
- if(debug['R'] && debug['v'])
- dumpit("pass2", &firstr->f, 1);
-
- /*
- * pass 2.5
- * iterate propagating fat vardef covering forward
- * r->act records vars with a VARDEF since the last CALL.
- * (r->act will be reused in pass 5 for something else,
- * but we'll be done with it by then.)
- */
- active = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- r->f.active = 0;
- r->act = zbits;
- }
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- p = r->f.prog;
- if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) {
- active++;
- walkvardef(p->to.node, r, active);
- }
- }
-
- /*
- * pass 3
- * iterate propagating usage
- * back until flow graph is complete
- */
-loop1:
- change = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->f.active = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- if(r->f.prog->as == ARET)
- prop(r, zbits, zbits);
-loop11:
- /* pick up unreachable code */
- i = 0;
- for(r = firstr; r != R; r = r1) {
- r1 = (Reg*)r->f.link;
- if(r1 && r1->f.active && !r->f.active) {
- prop(r, zbits, zbits);
- i = 1;
- }
- }
- if(i)
- goto loop11;
- if(change)
- goto loop1;
-
- if(debug['R'] && debug['v'])
- dumpit("pass3", &firstr->f, 1);
-
- /*
- * pass 4
- * iterate propagating register/variable synchrony
- * forward until graph is complete
- */
-loop2:
- change = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->f.active = 0;
- synch(firstr, zbits);
- if(change)
- goto loop2;
-
- if(debug['R'] && debug['v'])
- dumpit("pass4", &firstr->f, 1);
-
- /*
- * pass 4.5
- * move register pseudo-variables into regu.
- */
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
-
- r->set.b[0] &= ~REGBITS;
- r->use1.b[0] &= ~REGBITS;
- r->use2.b[0] &= ~REGBITS;
- r->refbehind.b[0] &= ~REGBITS;
- r->refahead.b[0] &= ~REGBITS;
- r->calbehind.b[0] &= ~REGBITS;
- r->calahead.b[0] &= ~REGBITS;
- r->regdiff.b[0] &= ~REGBITS;
- r->act.b[0] &= ~REGBITS;
- }
-
- /*
- * pass 5
- * isolate regions
- * calculate costs (paint1)
- */
- r = firstr;
- if(r) {
- for(z=0; z<BITS; z++)
- bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
- ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
- if(bany(&bit) && !r->f.refset) {
- // should never happen - all variables are preset
- if(debug['w'])
- print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
- r->f.refset = 1;
- }
- }
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->act = zbits;
- rgp = region;
- nregion = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- for(z=0; z<BITS; z++)
- bit.b[z] = r->set.b[z] &
- ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
- if(bany(&bit) && !r->f.refset) {
- if(debug['w'])
- print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
- r->f.refset = 1;
- excise(&r->f);
- }
- for(z=0; z<BITS; z++)
- bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
- while(bany(&bit)) {
- i = bnum(bit);
- rgp->enter = r;
- rgp->varno = i;
- change = 0;
- paint1(r, i);
- biclr(&bit, i);
- if(change <= 0)
- continue;
- rgp->cost = change;
- nregion++;
- if(nregion >= NRGN) {
- if(debug['R'] && debug['v'])
- print("too many regions\n");
- goto brk;
- }
- rgp++;
- }
- }
-brk:
- qsort(region, nregion, sizeof(region[0]), rcmp);
-
- /*
- * pass 6
- * determine used registers (paint2)
- * replace code (paint3)
- */
- rgp = region;
- if(debug['R'] && debug['v'])
- print("\nregisterizing\n");
- for(i=0; i<nregion; i++) {
- if(debug['R'] && debug['v'])
- print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
- bit = blsh(rgp->varno);
- vreg = paint2(rgp->enter, rgp->varno, 0);
- vreg = allreg(vreg, rgp);
- if(rgp->regno != 0)
- paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
- rgp++;
- }
-
- /*
- * free aux structures. peep allocates new ones.
- */
- for(i=0; i<nvar; i++)
- var[i].node->opt = nil;
- flowend(g);
- firstr = R;
-
- if(debug['R'] && debug['v']) {
- // Rebuild flow graph, since we inserted instructions
- g = flowstart(firstp, sizeof(Reg));
- firstr = (Reg*)g->start;
- dumpit("pass6", &firstr->f, 1);
- flowend(g);
- firstr = R;
- }
-
- /*
- * pass 7
- * peep-hole on basic block
- */
- if(!debug['R'] || debug['P'])
- peep(firstp);
-
- /*
- * eliminate nops
- */
- for(p=firstp; p!=P; p=p->link) {
- while(p->link != P && p->link->as == ANOP)
- p->link = p->link->link;
- if(p->to.type == TYPE_BRANCH)
- while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
- p->to.u.branch = p->to.u.branch->link;
- }
-
- if(!use_sse)
- for(p=firstp; p!=P; p=p->link) {
- if(p->from.reg >= REG_X0 && p->from.reg <= REG_X7)
- fatal("invalid use of %R with GO386=387: %P", p->from.reg, p);
- if(p->to.reg >= REG_X0 && p->to.reg <= REG_X7)
- fatal("invalid use of %R with GO386=387: %P", p->to.reg, p);
- }
-
- if(debug['R']) {
- if(ostats.ncvtreg ||
- ostats.nspill ||
- ostats.nreload ||
- ostats.ndelmov ||
- ostats.nvar ||
- ostats.naddr ||
- 0)
- print("\nstats\n");
-
- if(ostats.ncvtreg)
- print(" %4d cvtreg\n", ostats.ncvtreg);
- if(ostats.nspill)
- print(" %4d spill\n", ostats.nspill);
- if(ostats.nreload)
- print(" %4d reload\n", ostats.nreload);
- if(ostats.ndelmov)
- print(" %4d delmov\n", ostats.ndelmov);
- if(ostats.nvar)
- print(" %4d var\n", ostats.nvar);
- if(ostats.naddr)
- print(" %4d addr\n", ostats.naddr);
-
- memset(&ostats, 0, sizeof(ostats));
- }
+ *n = NREGVAR;
+ return regname;
}
-static void
-walkvardef(Node *n, Reg *r, int active)
+uint64
+excludedregs(void)
{
- Reg *r1, *r2;
- int bn;
- Var *v;
-
- for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
- if(r1->f.active == active)
- break;
- r1->f.active = active;
- if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
- break;
- for(v=n->opt; v!=nil; v=v->nextinnode) {
- bn = v - var;
- biset(&r1->act, bn);
- }
- if(r1->f.prog->as == ACALL)
- break;
- }
-
- for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
- if(r2->f.s2 != nil)
- walkvardef(n, (Reg*)r2->f.s2, active);
+ return RtoB(REG_SP);
}
-/*
- * add mov b,rn
- * just after r
- */
-void
-addmove(Reg *r, int bn, int rn, int f)
-{
- Prog *p, *p1;
- Adr *a;
- Var *v;
-
- p1 = mal(sizeof(*p1));
- clearp(p1);
- p1->pc = 9999;
-
- p = r->f.prog;
- p1->link = p->link;
- p->link = p1;
- p1->lineno = p->lineno;
-
- v = var + bn;
-
- a = &p1->to;
- a->offset = v->offset;
- a->etype = v->etype;
- a->type = TYPE_MEM;
- a->name = v->name;
- a->node = v->node;
- a->sym = linksym(v->node->sym);
-
- // need to clean this up with wptr and
- // some of the defaults
- p1->as = AMOVL;
- switch(v->etype) {
- default:
- fatal("unknown type %E", v->etype);
- case TINT8:
- case TUINT8:
- case TBOOL:
- p1->as = AMOVB;
- break;
- case TINT16:
- case TUINT16:
- p1->as = AMOVW;
- break;
- case TFLOAT32:
- p1->as = AMOVSS;
- break;
- case TFLOAT64:
- p1->as = AMOVSD;
- break;
- case TINT:
- case TUINT:
- case TINT32:
- case TUINT32:
- case TPTR32:
- break;
- }
-
- p1->from.type = TYPE_REG;
- p1->from.reg = rn;
- p1->from.name = 0;
- if(!f) {
- p1->from = *a;
- *a = zprog.from;
- a->type = TYPE_REG;
- a->reg = rn;
- if(v->etype == TUINT8)
- p1->as = AMOVB;
- if(v->etype == TUINT16)
- p1->as = AMOVW;
- }
- if(debug['R'] && debug['v'])
- print("%P ===add=== %P\n", p, p1);
- ostats.nspill++;
-}
-
-uint32
+uint64
doregbits(int r)
{
- uint32 b;
+ uint64 b;
b = 0;
if(r >= REG_AX && r <= REG_DI)
return b;
}
-static int
-overlap(int32 o1, int w1, int32 o2, int w2)
-{
- int32 t1, t2;
-
- t1 = o1+w1;
- t2 = o2+w2;
-
- if(!(t1 > o2 && t2 > o1))
- return 0;
-
- return 1;
-}
-
-Bits
-mkvar(Reg *r, Adr *a)
-{
- Var *v;
- int i, n, et, z, w, flag, regu;
- int32 o;
- Bits bit;
- Node *node;
-
- /*
- * mark registers used
- */
- if(a->type == TYPE_NONE)
- goto none;
-
- if(r != R)
- r->use1.b[0] |= doregbits(a->index);
-
- switch(a->type) {
- default:
- regu = doregbits(a->reg);
- if(regu == 0)
- goto none;
- bit = zbits;
- bit.b[0] = regu;
- return bit;
-
- case TYPE_ADDR:
- a->type = TYPE_MEM;
- bit = mkvar(r, a);
- setaddrs(bit);
- a->type = TYPE_ADDR;
- ostats.naddr++;
- goto none;
-
- case TYPE_MEM:
- switch(a->name) {
- default:
- goto none;
- case NAME_EXTERN:
- case NAME_STATIC:
- case NAME_PARAM:
- case NAME_AUTO:
- n = a->name;
- break;
- }
- }
-
- node = a->node;
- if(node == N || node->op != ONAME || node->orig == N)
- goto none;
- node = node->orig;
- if(node->orig != node)
- fatal("%D: bad node", a);
- if(node->sym == S || node->sym->name[0] == '.')
- goto none;
- et = a->etype;
- o = a->offset;
- w = a->width;
- if(w < 0)
- fatal("bad width %d for %D", w, a);
-
- flag = 0;
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node && v->name == n) {
- if(v->offset == o)
- if(v->etype == et)
- if(v->width == w)
- return blsh(i);
-
- // if they overlap, disable both
- if(overlap(v->offset, v->width, o, w)) {
- if(debug['R'])
- print("disable %s\n", node->sym->name);
- v->addr = 1;
- flag = 1;
- }
- }
- }
-
- switch(et) {
- case 0:
- case TFUNC:
- goto none;
- }
-
- if(nvar >= NVAR) {
- if(debug['w'] > 1 && node != N)
- fatal("variable not optimized: %D", a);
-
- // If we're not tracking a word in a variable, mark the rest as
- // having its address taken, so that we keep the whole thing
- // live at all calls. otherwise we might optimize away part of
- // a variable but not all of it.
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node)
- v->addr = 1;
- }
- goto none;
- }
-
- i = nvar;
- nvar++;
- v = var+i;
- v->offset = o;
- v->name = n;
- v->etype = et;
- v->width = w;
- v->addr = flag; // funny punning
- v->node = node;
-
- // node->opt is the head of a linked list
- // of Vars within the given Node, so that
- // we can start at a Var and find all the other
- // Vars in the same Go variable.
- v->nextinnode = node->opt;
- node->opt = v;
-
- bit = blsh(i);
- if(n == NAME_EXTERN || n == NAME_STATIC)
- for(z=0; z<BITS; z++)
- externs.b[z] |= bit.b[z];
- if(n == NAME_PARAM)
- for(z=0; z<BITS; z++)
- params.b[z] |= bit.b[z];
-
- if(node->class == PPARAM)
- for(z=0; z<BITS; z++)
- ivar.b[z] |= bit.b[z];
- if(node->class == PPARAMOUT)
- for(z=0; z<BITS; z++)
- ovar.b[z] |= bit.b[z];
-
- // Treat values with their address taken as live at calls,
- // because the garbage collector's liveness analysis in ../gc/plive.c does.
- // These must be consistent or else we will elide stores and the garbage
- // collector will see uninitialized data.
- // The typical case where our own analysis is out of sync is when the
- // node appears to have its address taken but that code doesn't actually
- // get generated and therefore doesn't show up as an address being
- // taken when we analyze the instruction stream.
- // One instance of this case is when a closure uses the same name as
- // an outer variable for one of its own variables declared with :=.
- // The parser flags the outer variable as possibly shared, and therefore
- // sets addrtaken, even though it ends up not being actually shared.
- // If we were better about _ elision, _ = &x would suffice too.
- // The broader := in a closure problem is mentioned in a comment in
- // closure.c:/^typecheckclosure and dcl.c:/^oldname.
- if(node->addrtaken)
- v->addr = 1;
-
- // Disable registerization for globals, because:
- // (1) we might panic at any time and we want the recovery code
- // to see the latest values (issue 1304).
- // (2) we don't know what pointers might point at them and we want
- // loads via those pointers to see updated values and vice versa (issue 7995).
- //
- // Disable registerization for results if using defer, because the deferred func
- // might recover and return, causing the current values to be used.
- if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
- v->addr = 1;
-
- if(debug['R'])
- print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
- ostats.nvar++;
-
- return bit;
-
-none:
- return zbits;
-}
-
-void
-prop(Reg *r, Bits ref, Bits cal)
-{
- Reg *r1, *r2;
- int z, i, j;
- Var *v, *v1;
-
- for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
- for(z=0; z<BITS; z++) {
- ref.b[z] |= r1->refahead.b[z];
- if(ref.b[z] != r1->refahead.b[z]) {
- r1->refahead.b[z] = ref.b[z];
- change++;
- }
- cal.b[z] |= r1->calahead.b[z];
- if(cal.b[z] != r1->calahead.b[z]) {
- r1->calahead.b[z] = cal.b[z];
- change++;
- }
- }
- switch(r1->f.prog->as) {
- case ACALL:
- if(noreturn(r1->f.prog))
- break;
-
- // Mark all input variables (ivar) as used, because that's what the
- // liveness bitmaps say. The liveness bitmaps say that so that a
- // panic will not show stale values in the parameter dump.
- // Mark variables with a recent VARDEF (r1->act) as used,
- // so that the optimizer flushes initializations to memory,
- // so that if a garbage collection happens during this CALL,
- // the collector will see initialized memory. Again this is to
- // match what the liveness bitmaps say.
- for(z=0; z<BITS; z++) {
- cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
- ref.b[z] = 0;
- }
-
- // cal.b is the current approximation of what's live across the call.
- // Every bit in cal.b is a single stack word. For each such word,
- // find all the other tracked stack words in the same Go variable
- // (struct/slice/string/interface) and mark them live too.
- // This is necessary because the liveness analysis for the garbage
- // collector works at variable granularity, not at word granularity.
- // It is fundamental for slice/string/interface: the garbage collector
- // needs the whole value, not just some of the words, in order to
- // interpret the other bits correctly. Specifically, slice needs a consistent
- // ptr and cap, string needs a consistent ptr and len, and interface
- // needs a consistent type word and data word.
- for(z=0; z<BITS; z++) {
- if(cal.b[z] == 0)
- continue;
- for(i=0; i<64; i++) {
- if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
- continue;
- v = var+z*64+i;
- if(v->node->opt == nil) // v represents fixed register, not Go variable
- continue;
-
- // v->node->opt is the head of a linked list of Vars
- // corresponding to tracked words from the Go variable v->node.
- // Walk the list and set all the bits.
- // For a large struct this could end up being quadratic:
- // after the first setting, the outer loop (for z, i) would see a 1 bit
- // for all of the remaining words in the struct, and for each such
- // word would go through and turn on all the bits again.
- // To avoid the quadratic behavior, we only turn on the bits if
- // v is the head of the list or if the head's bit is not yet turned on.
- // This will set the bits at most twice, keeping the overall loop linear.
- v1 = v->node->opt;
- j = v1 - var;
- if(v == v1 || !btest(&cal, j)) {
- for(; v1 != nil; v1 = v1->nextinnode) {
- j = v1 - var;
- biset(&cal, j);
- }
- }
- }
- }
- break;
-
- case ATEXT:
- for(z=0; z<BITS; z++) {
- cal.b[z] = 0;
- ref.b[z] = 0;
- }
- break;
-
- case ARET:
- for(z=0; z<BITS; z++) {
- cal.b[z] = externs.b[z] | ovar.b[z];
- ref.b[z] = 0;
- }
- break;
- }
- for(z=0; z<BITS; z++) {
- ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
- r1->use1.b[z] | r1->use2.b[z];
- cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
- r1->refbehind.b[z] = ref.b[z];
- r1->calbehind.b[z] = cal.b[z];
- }
- if(r1->f.active)
- break;
- r1->f.active = 1;
- }
- for(; r != r1; r = (Reg*)r->f.p1)
- for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
- prop(r2, r->refbehind, r->calbehind);
-}
-
-void
-synch(Reg *r, Bits dif)
-{
- Reg *r1;
- int z;
-
- for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
- for(z=0; z<BITS; z++) {
- dif.b[z] = (dif.b[z] &
- ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
- r1->set.b[z] | r1->regdiff.b[z];
- if(dif.b[z] != r1->regdiff.b[z]) {
- r1->regdiff.b[z] = dif.b[z];
- change++;
- }
- }
- if(r1->f.active)
- break;
- r1->f.active = 1;
- for(z=0; z<BITS; z++)
- dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
- if((Reg*)r1->f.s2 != R)
- synch((Reg*)r1->f.s2, dif);
- }
-}
-
-uint32
-allreg(uint32 b, Rgn *r)
-{
- Var *v;
- int i;
-
- v = var + r->varno;
- r->regno = 0;
- switch(v->etype) {
-
- default:
- fatal("unknown etype %d/%E", bitno(b), v->etype);
- break;
-
- case TINT8:
- case TUINT8:
- case TINT16:
- case TUINT16:
- case TINT32:
- case TUINT32:
- case TINT64:
- case TINT:
- case TUINT:
- case TUINTPTR:
- case TBOOL:
- case TPTR32:
- i = BtoR(~b);
- if(i && r->cost > 0) {
- r->regno = i;
- return RtoB(i);
- }
- break;
-
- case TFLOAT32:
- case TFLOAT64:
- if(!use_sse)
- break;
- i = BtoF(~b);
- if(i && r->cost > 0) {
- r->regno = i;
- return FtoB(i);
- }
- break;
- }
- return 0;
-}
-
-void
-paint1(Reg *r, int bn)
-{
- Reg *r1;
- Prog *p;
- int z;
- uint64 bb, rbz;
-
- z = bn/64;
- bb = 1LL<<(bn%64);
- if(r->act.b[z] & bb)
- return;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(r1->act.b[z] & bb)
- break;
- r = r1;
- }
-
- rbz = ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z]));
- if(LOAD(r) & rbz & bb) {
- change -= CLOAD * r->f.loop;
- }
- for(;;) {
- r->act.b[z] |= bb;
- p = r->f.prog;
-
- if(r->f.prog->as != ANOP) { // don't give credit for NOPs
- if(r->use1.b[z] & bb) {
- change += CREF * r->f.loop;
- if(p->as == AFMOVL || p->as == AFMOVW)
- if(BtoR(bb) != REG_F0)
- change = -CINF;
- }
- if((r->use2.b[z]|r->set.b[z]) & bb) {
- change += CREF * r->f.loop;
- if(p->as == AFMOVL || p->as == AFMOVW)
- if(BtoR(bb) != REG_F0)
- change = -CINF;
- }
- }
-
- if(STORE(r) & r->regdiff.b[z] & bb) {
- change -= CLOAD * r->f.loop;
- if(p->as == AFMOVL || p->as == AFMOVW)
- if(BtoR(bb) != REG_F0)
- change = -CINF;
- }
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- paint1(r1, bn);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- paint1(r1, bn);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(r->act.b[z] & bb)
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
-}
-
-uint32
-paint2(Reg *r, int bn, int depth)
-{
- Reg *r1;
- int z;
- uint64 bb, vreg;
-
- z = bn/64;
- bb = 1LL << (bn%64);
- vreg = regbits;
- if(!(r->act.b[z] & bb))
- return vreg;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(!(r1->act.b[z] & bb))
- break;
- r = r1;
- }
- for(;;) {
- if(debug['R'] && debug['v'])
- print(" paint2 %d %P\n", depth, r->f.prog);
-
- r->act.b[z] &= ~bb;
-
- vreg |= r->regu;
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- vreg |= paint2(r1, bn, depth+1);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- vreg |= paint2(r1, bn, depth+1);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(!(r->act.b[z] & bb))
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
-
- return vreg;
-}
-
-void
-paint3(Reg *r, int bn, uint32 rb, int rn)
-{
- Reg *r1;
- Prog *p;
- int z;
- uint64 bb, rbz;
-
- z = bn/64;
- bb = 1LL << (bn%64);
- if(r->act.b[z] & bb)
- return;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(r1->act.b[z] & bb)
- break;
- r = r1;
- }
-
- rbz = ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z]));
- if(LOAD(r) & rbz & bb)
- addmove(r, bn, rn, 0);
- for(;;) {
- r->act.b[z] |= bb;
- p = r->f.prog;
-
- if(r->use1.b[z] & bb) {
- if(debug['R'] && debug['v'])
- print("%P", p);
- addreg(&p->from, rn);
- if(debug['R'] && debug['v'])
- print(" ===change== %P\n", p);
- }
- if((r->use2.b[z]|r->set.b[z]) & bb) {
- if(debug['R'] && debug['v'])
- print("%P", p);
- addreg(&p->to, rn);
- if(debug['R'] && debug['v'])
- print(" ===change== %P\n", p);
- }
-
- if(STORE(r) & r->regdiff.b[z] & bb)
- addmove(r, bn, rn, 1);
- r->regu |= rb;
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- paint3(r1, bn, rb, rn);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- paint3(r1, bn, rb, rn);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(r->act.b[z] & bb)
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
-}
-
-void
-addreg(Adr *a, int rn)
-{
- a->sym = nil;
- a->node = nil;
- a->offset = 0;
- a->type = TYPE_REG;
- a->reg = rn;
- a->name = 0;
-
- ostats.ncvtreg++;
-}
-
-uint32
+uint64
RtoB(int r)
{
if(r < REG_AX || r > REG_DI)
return 0;
- return 1L << (r-REG_AX);
+ return 1ULL << (r-REG_AX);
}
int
-BtoR(uint32 b)
+BtoR(uint64 b)
{
b &= 0xffL;
return bitno(b) + REG_AX;
}
-uint32
+uint64
FtoB(int f)
{
if(f < REG_X0 || f > REG_X7)
return 0;
- return 1L << (f - REG_X0 + 8);
+ return 1ULL << (f - REG_X0 + 8);
}
int
-BtoF(uint32 b)
+BtoF(uint64 b)
{
b &= 0xFF00L;
if(b == 0)
return 0;
return bitno(b) - 8 + REG_X0;
}
-
-void
-dumpone(Flow *f, int isreg)
-{
- int z;
- Bits bit;
- Reg *r;
-
- print("%d:%P", f->loop, f->prog);
- if(isreg) {
- r = (Reg*)f;
- for(z=0; z<BITS; z++)
- bit.b[z] =
- r->set.b[z] |
- r->use1.b[z] |
- r->use2.b[z] |
- r->refbehind.b[z] |
- r->refahead.b[z] |
- r->calbehind.b[z] |
- r->calahead.b[z] |
- r->regdiff.b[z] |
- r->act.b[z] |
- 0;
- if(bany(&bit)) {
- print("\t");
- if(bany(&r->set))
- print(" s:%Q", r->set);
- if(bany(&r->use1))
- print(" u1:%Q", r->use1);
- if(bany(&r->use2))
- print(" u2:%Q", r->use2);
- if(bany(&r->refbehind))
- print(" rb:%Q ", r->refbehind);
- if(bany(&r->refahead))
- print(" ra:%Q ", r->refahead);
- if(bany(&r->calbehind))
- print(" cb:%Q ", r->calbehind);
- if(bany(&r->calahead))
- print(" ca:%Q ", r->calahead);
- if(bany(&r->regdiff))
- print(" d:%Q ", r->regdiff);
- if(bany(&r->act))
- print(" a:%Q ", r->act);
- }
- }
- print("\n");
-}
-
-void
-dumpit(char *str, Flow *r0, int isreg)
-{
- Flow *r, *r1;
-
- print("\n%s\n", str);
- for(r = r0; r != nil; r = r->link) {
- dumpone(r, isreg);
- r1 = r->p2;
- if(r1 != nil) {
- print(" pred:");
- for(; r1 != nil; r1 = r1->p2link)
- print(" %.4ud", (int)r1->prog->pc);
- print("\n");
- }
- // Print successors if it's not just the next one
- if(r->s1 != r->link || r->s2 != nil) {
- print(" succ:");
- if(r->s1 != nil)
- print(" %.4ud", (int)r->s1->prog->pc);
- if(r->s2 != nil)
- print(" %.4ud", (int)r->s2->prog->pc);
- print("\n");
- }
- }
-}
arch.ginscall = ginscall;
arch.igen = igen;
arch.linkarchinit = linkarchinit;
+ arch.peep = peep;
arch.proginfo = proginfo;
arch.regalloc = regalloc;
arch.regfree = regfree;
- arch.regopt = regopt;
arch.regtyp = regtyp;
arch.sameaddr = sameaddr;
arch.smallindir = smallindir;
arch.stackaddr = stackaddr;
+ arch.excludedregs = excludedregs;
+ arch.RtoB = RtoB;
+ arch.FtoB = RtoB;
+ arch.BtoR = BtoR;
+ arch.BtoF = BtoF;
+ arch.optoas = optoas;
+ arch.doregbits = doregbits;
+ arch.regnames = regnames;
gcmain(argc, argv);
}
int stackaddr(Addr*);
Prog* unpatch(Prog*);
+
+/*
+ * reg.c
+ */
+uint64 excludedregs(void);
+uint64 RtoB(int);
+uint64 FtoB(int);
+int BtoR(uint64);
+int BtoF(uint64);
+uint64 doregbits(int);
+char** regnames(int*);
+
+/*
+ * peep.c
+ */
+void peep(Prog*);
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
static Prog *appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset);
static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi);
-// Derived from Inferno utils/6c/gc.h
-// http://code.google.com/p/inferno-os/source/browse/utils/6c/gc.h
-//
-// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
-// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
-// Portions Copyright © 1997-1999 Vita Nuova Limited
-// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
-// Portions Copyright © 2004,2006 Bruce Ellis
-// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
-// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
-// Portions Copyright © 2009 The Go Authors. All rights reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-
-#define Z N
-#define Adr Addr
-
-#define BLOAD(r) band(bnot(r->refbehind), r->refahead)
-#define BSTORE(r) band(bnot(r->calbehind), r->calahead)
-#define LOAD(r) (~r->refbehind.b[z] & r->refahead.b[z])
-#define STORE(r) (~r->calbehind.b[z] & r->calahead.b[z])
-
-#define CLOAD 5
-#define CREF 5
-#define CINF 1000
-#define LOOP 3
-
-typedef struct Reg Reg;
-typedef struct Rgn Rgn;
-
-/*c2go
-extern Node *Z;
-enum
-{
- CLOAD = 5,
- CREF = 5,
- CINF = 1000,
- LOOP = 3,
-};
-
-uint32 BLOAD(Reg*);
-uint32 BSTORE(Reg*);
-uint32 LOAD(Reg*);
-uint32 STORE(Reg*);
-*/
-
-// A Reg is a wrapper around a single Prog (one instruction) that holds
-// register optimization information while the optimizer runs.
-// r->prog is the instruction.
-// r->prog->opt points back to r.
-struct Reg
-{
- Flow f;
-
- Bits set; // regopt variables written by this instruction.
- Bits use1; // regopt variables read by prog->from.
- Bits use2; // regopt variables read by prog->to.
-
- // refahead/refbehind are the regopt variables whose current
- // value may be used in the following/preceding instructions
- // up to a CALL (or the value is clobbered).
- Bits refbehind;
- Bits refahead;
- // calahead/calbehind are similar, but for variables in
- // instructions that are reachable after hitting at least one
- // CALL.
- Bits calbehind;
- Bits calahead;
- Bits regdiff;
- Bits act;
-
- uint64 regu; // register used bitmap
-};
-#define R ((Reg*)0)
-/*c2go extern Reg *R; */
-
-#define NRGN 600
-/*c2go enum { NRGN = 600 }; */
-
-// A Rgn represents a single regopt variable over a region of code
-// where a register could potentially be dedicated to that variable.
-// The code encompassed by a Rgn is defined by the flow graph,
-// starting at enter, flood-filling forward while varno is refahead
-// and backward while varno is refbehind, and following branches. A
-// single variable may be represented by multiple disjoint Rgns and
-// each Rgn may choose a different register for that variable.
-// Registers are allocated to regions greedily in order of descending
-// cost.
-struct Rgn
-{
- Reg* enter;
- short cost;
- short varno;
- short regno;
-};
-
-EXTERN int32 exregoffset; // not set
-EXTERN int32 exfregoffset; // not set
-EXTERN Reg zreg;
-EXTERN Rgn region[NRGN];
-EXTERN Rgn* rgp;
-EXTERN int nregion;
-EXTERN int nvar;
-EXTERN int32 regbits;
-EXTERN int32 exregbits; // TODO(austin) not used; remove
-EXTERN Bits externs;
-EXTERN Bits params;
-EXTERN Bits consts;
-EXTERN Bits addrs;
-EXTERN Bits ivar;
-EXTERN Bits ovar;
-EXTERN int change;
-EXTERN int32 maxnr;
-
-EXTERN struct
-{
- int32 ncvtreg;
- int32 nspill;
- int32 ndelmov;
- int32 nvar;
-} ostats;
-
-/*
- * reg.c
- */
-int rcmp(const void*, const void*);
-void regopt(Prog*);
-void addmove(Reg*, int, int, int);
-Bits mkvar(Reg*, Adr*);
-void prop(Reg*, Bits, Bits);
-void synch(Reg*, Bits);
-uint64 allreg(uint64, Rgn*);
-void paint1(Reg*, int);
-uint64 paint2(Reg*, int, int);
-void paint3(Reg*, int, uint64, int);
-void addreg(Adr*, int);
-void dumpone(Flow*, int);
-void dumpit(char*, Flow*, int);
-
-/*
- * peep.c
- */
-void peep(Prog*);
-void excise(Flow*);
-int copyu(Prog*, Adr*, Adr*);
-
-uint64 RtoB(int);
-uint64 FtoB(int);
-int BtoR(uint64);
-int BtoF(uint64);
-
-/*
- * prog.c
- */
-void proginfo(ProgInfo*, Prog*);
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
// Many Power ISA arithmetic and logical instructions come in four
// standard variants. These bits let us map between variants.
#include <u.h>
#include <libc.h>
#include "gg.h"
+#include "../gc/popt.h"
#include "opt.h"
static int regzer(Addr *a);
static int copysub(Addr*, Addr*, Addr*, int);
static int copysub1(Prog*, Addr*, Addr*, int);
static int copyau1(Prog *p, Addr *v);
+static int copyu(Prog *p, Addr *v, Addr *s);
static uint32 gactive;
// 4 if v is set in one address and used in another (so addresses
// can be rewritten independently)
// 0 otherwise (not touched)
-int
+static int
copyu(Prog *p, Addr *v, Addr *s)
{
if(p->from3.type != TYPE_NONE)
#include <u.h>
#include <libc.h>
#include "gg.h"
+#include "../gc/popt.h"
#include "opt.h"
enum {
#include <u.h>
#include <libc.h>
#include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
-#define NREGVAR 64 /* 32 general + 32 floating */
-#define REGBITS ((uint64)0xffffffffffffffffull)
-/*c2go enum {
- NREGVAR = 64,
- REGBITS = 0xffffffffffffffff,
+enum {
+ NREGVAR = 64, /* 32 general + 32 floating */
};
-*/
-static Reg* firstr;
-static int first = 1;
-
-int
-rcmp(const void *a1, const void *a2)
-{
- Rgn *p1, *p2;
- int c1, c2;
-
- p1 = (Rgn*)a1;
- p2 = (Rgn*)a2;
- c1 = p2->cost;
- c2 = p1->cost;
- if(c1 -= c2)
- return c1;
- return p2->varno - p1->varno;
-}
-
-static void
-setaddrs(Bits bit)
-{
- int i, n;
- Var *v;
- Node *node;
-
- while(bany(&bit)) {
- // convert each bit to a variable
- i = bnum(bit);
- node = var[i].node;
- n = var[i].name;
- biclr(&bit, i);
-
- // disable all pieces of that variable
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node && v->name == n)
- v->addr = 2;
- }
- }
-}
static char* regname[] = {
".R0",
".F31",
};
-static Node* regnodes[NREGVAR];
-
-static void walkvardef(Node *n, Reg *r, int active);
-
-void
-regopt(Prog *firstp)
+char**
+regnames(int *n)
{
- Reg *r, *r1;
- Prog *p;
- Graph *g;
- ProgInfo info;
- int i, z, active;
- uint64 vreg, usedreg;
- Bits bit;
-
- if(first) {
- fmtinstall('Q', Qconv);
- first = 0;
- }
-
- mergetemp(firstp);
+ *n = NREGVAR;
+ return regname;
+}
- /*
- * control flow is more complicated in generated go code
- * than in generated c code. define pseudo-variables for
- * registers, so we have complete register usage information.
- */
- nvar = NREGVAR;
- memset(var, 0, NREGVAR*sizeof var[0]);
- for(i=0; i<NREGVAR; i++) {
- if(regnodes[i] == N)
- regnodes[i] = newname(lookup(regname[i]));
- var[i].node = regnodes[i];
- }
+uint64
+excludedregs(void)
+{
+ uint64 regbits;
// Exclude registers with fixed functions
regbits = (1<<0)|RtoB(REGSP)|RtoB(REGG)|RtoB(REGTLS);
// Also exclude floating point registers with fixed constants
regbits |= RtoB(REG_F27)|RtoB(REG_F28)|RtoB(REG_F29)|RtoB(REG_F30)|RtoB(REG_F31);
- externs = zbits;
- params = zbits;
- consts = zbits;
- addrs = zbits;
- ivar = zbits;
- ovar = zbits;
-
- /*
- * pass 1
- * build aux data structure
- * allocate pcs
- * find use and set of variables
- */
- g = flowstart(firstp, sizeof(Reg));
- if(g == nil) {
- for(i=0; i<nvar; i++)
- var[i].node->opt = nil;
- return;
- }
-
- firstr = (Reg*)g->start;
-
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- p = r->f.prog;
- if(p->as == AVARDEF || p->as == AVARKILL)
- continue;
- proginfo(&info, p);
-
- // Avoid making variables for direct-called functions.
- if(p->as == ABL && p->to.name == NAME_EXTERN)
- continue;
-
- // from vs to doesn't matter for registers
- r->use1.b[0] |= info.reguse | info.regindex;
- r->set.b[0] |= info.regset;
-
- // Compute used register for from
- bit = mkvar(r, &p->from);
- if(info.flags & LeftAddr)
- setaddrs(bit);
- if(info.flags & LeftRead)
- for(z=0; z<BITS; z++)
- r->use1.b[z] |= bit.b[z];
-
- // Compute used register for reg
- if(info.flags & RegRead)
- r->use1.b[0] |= RtoB(p->reg);
-
- // Currently we never generate three register forms.
- // If we do, this will need to change.
- if(p->from3.type != TYPE_NONE)
- fatal("regopt not implemented for from3");
-
- // Compute used register for to
- bit = mkvar(r, &p->to);
- if(info.flags & RightAddr)
- setaddrs(bit);
- if(info.flags & RightRead)
- for(z=0; z<BITS; z++)
- r->use2.b[z] |= bit.b[z];
- if(info.flags & RightWrite)
- for(z=0; z<BITS; z++)
- r->set.b[z] |= bit.b[z];
- }
-
- for(i=0; i<nvar; i++) {
- Var *v = var+i;
- if(v->addr) {
- bit = blsh(i);
- for(z=0; z<BITS; z++)
- addrs.b[z] |= bit.b[z];
- }
-
- if(debug['R'] && debug['v'])
- print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
- i, v->addr, v->etype, v->width, v->node, v->offset);
- }
-
- if(debug['R'] && debug['v'])
- dumpit("pass1", &firstr->f, 1);
-
- /*
- * pass 2
- * find looping structure
- */
- flowrpo(g);
-
- if(debug['R'] && debug['v'])
- dumpit("pass2", &firstr->f, 1);
-
- /*
- * pass 2.5
- * iterate propagating fat vardef covering forward
- * r->act records vars with a VARDEF since the last CALL.
- * (r->act will be reused in pass 5 for something else,
- * but we'll be done with it by then.)
- */
- active = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- r->f.active = 0;
- r->act = zbits;
- }
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- p = r->f.prog;
- if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) {
- active++;
- walkvardef(p->to.node, r, active);
- }
- }
-
- /*
- * pass 3
- * iterate propagating usage
- * back until flow graph is complete
- */
-loop1:
- change = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->f.active = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- if(r->f.prog->as == ARET)
- prop(r, zbits, zbits);
-loop11:
- /* pick up unreachable code */
- i = 0;
- for(r = firstr; r != R; r = r1) {
- r1 = (Reg*)r->f.link;
- if(r1 && r1->f.active && !r->f.active) {
- prop(r, zbits, zbits);
- i = 1;
- }
- }
- if(i)
- goto loop11;
- if(change)
- goto loop1;
-
- if(debug['R'] && debug['v'])
- dumpit("pass3", &firstr->f, 1);
-
- /*
- * pass 4
- * iterate propagating register/variable synchrony
- * forward until graph is complete
- */
-loop2:
- change = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->f.active = 0;
- synch(firstr, zbits);
- if(change)
- goto loop2;
-
- if(debug['R'] && debug['v'])
- dumpit("pass4", &firstr->f, 1);
-
- /*
- * pass 4.5
- * move register pseudo-variables into regu.
- */
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
-
- r->set.b[0] &= ~REGBITS;
- r->use1.b[0] &= ~REGBITS;
- r->use2.b[0] &= ~REGBITS;
- r->refbehind.b[0] &= ~REGBITS;
- r->refahead.b[0] &= ~REGBITS;
- r->calbehind.b[0] &= ~REGBITS;
- r->calahead.b[0] &= ~REGBITS;
- r->regdiff.b[0] &= ~REGBITS;
- r->act.b[0] &= ~REGBITS;
- }
-
- if(debug['R'] && debug['v'])
- dumpit("pass4.5", &firstr->f, 1);
-
- /*
- * pass 5
- * isolate regions
- * calculate costs (paint1)
- */
- r = firstr;
- if(r) {
- for(z=0; z<BITS; z++)
- bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
- ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
- if(bany(&bit) && !r->f.refset) {
- // should never happen - all variables are preset
- if(debug['w'])
- print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
- r->f.refset = 1;
- }
- }
- for(r = firstr; r != R; r = (Reg*)r->f.link)
- r->act = zbits;
- rgp = region;
- nregion = 0;
- for(r = firstr; r != R; r = (Reg*)r->f.link) {
- for(z=0; z<BITS; z++)
- bit.b[z] = r->set.b[z] &
- ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
- if(bany(&bit) && !r->f.refset) {
- if(debug['w'])
- print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
- r->f.refset = 1;
- excise(&r->f);
- }
- for(z=0; z<BITS; z++)
- bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
- while(bany(&bit)) {
- i = bnum(bit);
- rgp->enter = r;
- rgp->varno = i;
- change = 0;
- paint1(r, i);
- biclr(&bit, i);
- if(change <= 0)
- continue;
- rgp->cost = change;
- nregion++;
- if(nregion >= NRGN) {
- if(debug['R'] && debug['v'])
- print("too many regions\n");
- goto brk;
- }
- rgp++;
- }
- }
-brk:
- qsort(region, nregion, sizeof(region[0]), rcmp);
-
- if(debug['R'] && debug['v'])
- dumpit("pass5", &firstr->f, 1);
-
- /*
- * pass 6
- * determine used registers (paint2)
- * replace code (paint3)
- */
- rgp = region;
- if(debug['R'] && debug['v'])
- print("\nregisterizing\n");
- for(i=0; i<nregion; i++) {
- if(debug['R'] && debug['v'])
- print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
- bit = blsh(rgp->varno);
- usedreg = paint2(rgp->enter, rgp->varno, 0);
- vreg = allreg(usedreg, rgp);
- if(rgp->regno != 0) {
- if(debug['R'] && debug['v']) {
- Var *v;
-
- v = var + rgp->varno;
- print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%llx vreg=%llx\n",
- v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg);
- }
- paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
- }
- rgp++;
- }
-
- /*
- * free aux structures. peep allocates new ones.
- */
- for(i=0; i<nvar; i++)
- var[i].node->opt = nil;
- flowend(g);
- firstr = R;
-
- if(debug['R'] && debug['v']) {
- // Rebuild flow graph, since we inserted instructions
- g = flowstart(firstp, sizeof(Reg));
- firstr = (Reg*)g->start;
- dumpit("pass6", &firstr->f, 1);
- flowend(g);
- firstr = R;
- }
-
- /*
- * pass 7
- * peep-hole on basic block
- */
- if(!debug['R'] || debug['P'])
- peep(firstp);
-
- /*
- * eliminate nops
- */
- for(p=firstp; p!=P; p=p->link) {
- while(p->link != P && p->link->as == ANOP)
- p->link = p->link->link;
- if(p->to.type == TYPE_BRANCH)
- while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
- p->to.u.branch = p->to.u.branch->link;
- }
-
- if(debug['R']) {
- if(ostats.ncvtreg ||
- ostats.nspill ||
- ostats.ndelmov ||
- ostats.nvar ||
- 0)
- print("\nstats\n");
-
- if(ostats.ncvtreg)
- print(" %4d cvtreg\n", ostats.ncvtreg);
- if(ostats.nspill)
- print(" %4d spill\n", ostats.nspill);
- if(ostats.ndelmov)
- print(" %4d delmov\n", ostats.ndelmov);
- if(ostats.nvar)
- print(" %4d var\n", ostats.nvar);
-
- memset(&ostats, 0, sizeof(ostats));
- }
-
- return;
-}
-
-static void
-walkvardef(Node *n, Reg *r, int active)
-{
- Reg *r1, *r2;
- int bn;
- Var *v;
-
- for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
- if(r1->f.active == active)
- break;
- r1->f.active = active;
- if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
- break;
- for(v=n->opt; v!=nil; v=v->nextinnode) {
- bn = v - var;
- biset(&r1->act, bn);
- }
- if(r1->f.prog->as == ABL)
- break;
- }
-
- for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
- if(r2->f.s2 != nil)
- walkvardef(n, (Reg*)r2->f.s2, active);
-}
-
-/*
- * add mov b,rn
- * just after r
- */
-void
-addmove(Reg *r, int bn, int rn, int f)
-{
- Prog *p, *p1, *p2;
- Adr *a;
- Var *v;
-
- p1 = mal(sizeof(*p1));
- *p1 = zprog;
- p = r->f.prog;
-
- // If there's a stack fixup coming (ADD $n,R1 after BL newproc or BL deferproc),
- // delay the load until after the fixup.
- p2 = p->link;
- if(p2 && p2->as == AADD && p2->to.reg == REGSP && p2->to.type == TYPE_REG)
- p = p2;
-
- p1->link = p->link;
- p->link = p1;
- p1->lineno = p->lineno;
-
- v = var + bn;
-
- a = &p1->to;
- a->name = v->name;
- a->node = v->node;
- a->sym = linksym(v->node->sym);
- a->offset = v->offset;
- a->etype = v->etype;
- a->type = TYPE_MEM;
- if(a->etype == TARRAY)
- a->type = TYPE_ADDR;
- else if(a->sym == nil)
- a->type = TYPE_CONST;
-
- if(v->addr)
- fatal("addmove: shouldn't be doing this %A\n", a);
-
- switch(v->etype) {
- default:
- print("What is this %E\n", v->etype);
-
- case TINT8:
- p1->as = AMOVB;
- break;
- case TBOOL:
- case TUINT8:
-//print("movbu %E %d %S\n", v->etype, bn, v->sym);
- p1->as = AMOVBZ;
- break;
- case TINT16:
- p1->as = AMOVH;
- break;
- case TUINT16:
- p1->as = AMOVHZ;
- break;
- case TINT32:
- p1->as = AMOVW;
- break;
- case TUINT32:
- case TPTR32:
- p1->as = AMOVWZ;
- break;
- case TINT64:
- case TUINT64:
- case TPTR64:
- p1->as = AMOVD;
- break;
- case TFLOAT32:
- p1->as = AFMOVS;
- break;
- case TFLOAT64:
- p1->as = AFMOVD;
- break;
- }
-
- p1->from.type = TYPE_REG;
- p1->from.reg = rn;
- if(!f) {
- p1->from = *a;
- *a = zprog.from;
- a->type = TYPE_REG;
- a->reg = rn;
- if(v->etype == TUINT8 || v->etype == TBOOL)
- p1->as = AMOVBZ;
- if(v->etype == TUINT16)
- p1->as = AMOVHZ;
- }
- if(debug['R'])
- print("%P\t.a%P\n", p, p1);
- ostats.nspill++;
-}
-
-static int
-overlap(int64 o1, int w1, int64 o2, int w2)
-{
- int64 t1, t2;
-
- t1 = o1+w1;
- t2 = o2+w2;
-
- if(!(t1 > o2 && t2 > o1))
- return 0;
-
- return 1;
-}
-
-Bits
-mkvar(Reg *r, Adr *a)
-{
- USED(r);
- Var *v;
- int i, t, n, et, z, flag;
- int64 w;
- int64 o;
- Bits bit;
- Node *node;
-
- // mark registers used
- t = a->type;
- switch(t) {
- default:
- print("type %d %d %D\n", t, a->name, a);
- goto none;
-
- case TYPE_NONE:
- goto none;
-
- case TYPE_BRANCH:
- case TYPE_CONST:
- case TYPE_FCONST:
- case TYPE_SCONST:
- case TYPE_MEM:
- case TYPE_ADDR:
- break;
-
- case TYPE_REG:
- if(a->reg != 0) {
- bit = zbits;
- bit.b[0] = RtoB(a->reg);
- return bit;
- }
- break;
- }
-
- switch(a->name) {
- default:
- goto none;
-
- case NAME_EXTERN:
- case NAME_STATIC:
- case NAME_AUTO:
- case NAME_PARAM:
- n = a->name;
- break;
- }
-
- node = a->node;
- if(node == N || node->op != ONAME || node->orig == N)
- goto none;
- node = node->orig;
- if(node->orig != node)
- fatal("%D: bad node", a);
- if(node->sym == S || node->sym->name[0] == '.')
- goto none;
- et = a->etype;
- o = a->offset;
- w = a->width;
- if(w < 0)
- fatal("bad width %lld for %D", w, a);
-
- flag = 0;
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node && v->name == n) {
- if(v->offset == o)
- if(v->etype == et)
- if(v->width == w)
- return blsh(i);
-
- // if they overlap, disable both
- if(overlap(v->offset, v->width, o, w)) {
- v->addr = 1;
- flag = 1;
- }
- }
- }
-
- switch(et) {
- case 0:
- case TFUNC:
- goto none;
- }
-
- if(nvar >= NVAR) {
- if(debug['w'] > 1 && node != N)
- fatal("variable not optimized: %#N", node);
-
- // If we're not tracking a word in a variable, mark the rest as
- // having its address taken, so that we keep the whole thing
- // live at all calls. otherwise we might optimize away part of
- // a variable but not all of it.
- for(i=0; i<nvar; i++) {
- v = var+i;
- if(v->node == node)
- v->addr = 1;
- }
- goto none;
- }
-
- i = nvar;
- nvar++;
- v = var+i;
- v->offset = o;
- v->name = n;
- v->etype = et;
- v->width = w;
- v->addr = flag; // funny punning
- v->node = node;
-
- // node->opt is the head of a linked list
- // of Vars within the given Node, so that
- // we can start at a Var and find all the other
- // Vars in the same Go variable.
- v->nextinnode = node->opt;
- node->opt = v;
-
- bit = blsh(i);
- if(n == NAME_EXTERN || n == NAME_STATIC)
- for(z=0; z<BITS; z++)
- externs.b[z] |= bit.b[z];
- if(n == NAME_PARAM)
- for(z=0; z<BITS; z++)
- params.b[z] |= bit.b[z];
-
- if(node->class == PPARAM)
- for(z=0; z<BITS; z++)
- ivar.b[z] |= bit.b[z];
- if(node->class == PPARAMOUT)
- for(z=0; z<BITS; z++)
- ovar.b[z] |= bit.b[z];
-
- // Treat values with their address taken as live at calls,
- // because the garbage collector's liveness analysis in ../gc/plive.c does.
- // These must be consistent or else we will elide stores and the garbage
- // collector will see uninitialized data.
- // The typical case where our own analysis is out of sync is when the
- // node appears to have its address taken but that code doesn't actually
- // get generated and therefore doesn't show up as an address being
- // taken when we analyze the instruction stream.
- // One instance of this case is when a closure uses the same name as
- // an outer variable for one of its own variables declared with :=.
- // The parser flags the outer variable as possibly shared, and therefore
- // sets addrtaken, even though it ends up not being actually shared.
- // If we were better about _ elision, _ = &x would suffice too.
- // The broader := in a closure problem is mentioned in a comment in
- // closure.c:/^typecheckclosure and dcl.c:/^oldname.
- if(node->addrtaken)
- v->addr = 1;
-
- // Disable registerization for globals, because:
- // (1) we might panic at any time and we want the recovery code
- // to see the latest values (issue 1304).
- // (2) we don't know what pointers might point at them and we want
- // loads via those pointers to see updated values and vice versa (issue 7995).
- //
- // Disable registerization for results if using defer, because the deferred func
- // might recover and return, causing the current values to be used.
- if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
- v->addr = 1;
-
- if(debug['R'])
- print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
- ostats.nvar++;
-
- return bit;
-
-none:
- return zbits;
-}
-
-void
-prop(Reg *r, Bits ref, Bits cal)
-{
- Reg *r1, *r2;
- int z, i, j;
- Var *v, *v1;
-
- for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
- for(z=0; z<BITS; z++) {
- ref.b[z] |= r1->refahead.b[z];
- if(ref.b[z] != r1->refahead.b[z]) {
- r1->refahead.b[z] = ref.b[z];
- change++;
- }
- cal.b[z] |= r1->calahead.b[z];
- if(cal.b[z] != r1->calahead.b[z]) {
- r1->calahead.b[z] = cal.b[z];
- change++;
- }
- }
- switch(r1->f.prog->as) {
- case ABL:
- if(noreturn(r1->f.prog))
- break;
-
- // Mark all input variables (ivar) as used, because that's what the
- // liveness bitmaps say. The liveness bitmaps say that so that a
- // panic will not show stale values in the parameter dump.
- // Mark variables with a recent VARDEF (r1->act) as used,
- // so that the optimizer flushes initializations to memory,
- // so that if a garbage collection happens during this CALL,
- // the collector will see initialized memory. Again this is to
- // match what the liveness bitmaps say.
- for(z=0; z<BITS; z++) {
- cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
- ref.b[z] = 0;
- }
-
- // cal.b is the current approximation of what's live across the call.
- // Every bit in cal.b is a single stack word. For each such word,
- // find all the other tracked stack words in the same Go variable
- // (struct/slice/string/interface) and mark them live too.
- // This is necessary because the liveness analysis for the garbage
- // collector works at variable granularity, not at word granularity.
- // It is fundamental for slice/string/interface: the garbage collector
- // needs the whole value, not just some of the words, in order to
- // interpret the other bits correctly. Specifically, slice needs a consistent
- // ptr and cap, string needs a consistent ptr and len, and interface
- // needs a consistent type word and data word.
- for(z=0; z<BITS; z++) {
- if(cal.b[z] == 0)
- continue;
- for(i=0; i<64; i++) {
- if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
- continue;
- v = var+z*64+i;
- if(v->node->opt == nil) // v represents fixed register, not Go variable
- continue;
-
- // v->node->opt is the head of a linked list of Vars
- // corresponding to tracked words from the Go variable v->node.
- // Walk the list and set all the bits.
- // For a large struct this could end up being quadratic:
- // after the first setting, the outer loop (for z, i) would see a 1 bit
- // for all of the remaining words in the struct, and for each such
- // word would go through and turn on all the bits again.
- // To avoid the quadratic behavior, we only turn on the bits if
- // v is the head of the list or if the head's bit is not yet turned on.
- // This will set the bits at most twice, keeping the overall loop linear.
- v1 = v->node->opt;
- j = v1 - var;
- if(v == v1 || !btest(&cal, j)) {
- for(; v1 != nil; v1 = v1->nextinnode) {
- j = v1 - var;
- biset(&cal, j);
- }
- }
- }
- }
- break;
-
- case ATEXT:
- for(z=0; z<BITS; z++) {
- cal.b[z] = 0;
- ref.b[z] = 0;
- }
- break;
-
- case ARET:
- for(z=0; z<BITS; z++) {
- cal.b[z] = externs.b[z] | ovar.b[z];
- ref.b[z] = 0;
- }
- break;
- }
- for(z=0; z<BITS; z++) {
- ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
- r1->use1.b[z] | r1->use2.b[z];
- cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
- r1->refbehind.b[z] = ref.b[z];
- r1->calbehind.b[z] = cal.b[z];
- }
- if(r1->f.active)
- break;
- r1->f.active = 1;
- }
- for(; r != r1; r = (Reg*)r->f.p1)
- for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
- prop(r2, r->refbehind, r->calbehind);
-}
-
-void
-synch(Reg *r, Bits dif)
-{
- Reg *r1;
- int z;
-
- for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
- for(z=0; z<BITS; z++) {
- dif.b[z] = (dif.b[z] &
- ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
- r1->set.b[z] | r1->regdiff.b[z];
- if(dif.b[z] != r1->regdiff.b[z]) {
- r1->regdiff.b[z] = dif.b[z];
- change++;
- }
- }
- if(r1->f.active)
- break;
- r1->f.active = 1;
- for(z=0; z<BITS; z++)
- dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
- if(r1->f.s2 != nil)
- synch((Reg*)r1->f.s2, dif);
- }
+ return regbits;
}
uint64
-allreg(uint64 b, Rgn *r)
+doregbits(int r)
{
- Var *v;
- int i;
-
- v = var + r->varno;
- r->regno = 0;
- switch(v->etype) {
-
- default:
- fatal("unknown etype %d/%E", bitno(b), v->etype);
- break;
-
- case TINT8:
- case TUINT8:
- case TINT16:
- case TUINT16:
- case TINT32:
- case TUINT32:
- case TINT64:
- case TUINT64:
- case TINT:
- case TUINT:
- case TUINTPTR:
- case TBOOL:
- case TPTR32:
- case TPTR64:
- i = BtoR(~b);
- if(i && r->cost > 0) {
- r->regno = i;
- return RtoB(i);
- }
- break;
-
- case TFLOAT32:
- case TFLOAT64:
- i = BtoF(~b);
- if(i && r->cost > 0) {
- r->regno = i;
- return RtoB(i);
- }
- break;
- }
+ USED(r);
return 0;
}
-void
-paint1(Reg *r, int bn)
-{
- Reg *r1;
- int z;
- uint64 bb;
-
- z = bn/64;
- bb = 1LL<<(bn%64);
- if(r->act.b[z] & bb)
- return;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(r1->act.b[z] & bb)
- break;
- r = r1;
- }
-
- if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
- change -= CLOAD * r->f.loop;
- }
- for(;;) {
- r->act.b[z] |= bb;
-
- if(r->f.prog->as != ANOP) { // don't give credit for NOPs
- if(r->use1.b[z] & bb)
- change += CREF * r->f.loop;
- if((r->use2.b[z]|r->set.b[z]) & bb)
- change += CREF * r->f.loop;
- }
-
- if(STORE(r) & r->regdiff.b[z] & bb) {
- change -= CLOAD * r->f.loop;
- }
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- paint1(r1, bn);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- paint1(r1, bn);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(r->act.b[z] & bb)
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
-}
-
-uint64
-paint2(Reg *r, int bn, int depth)
-{
- Reg *r1;
- int z;
- uint64 bb, vreg;
-
- z = bn/64;
- bb = 1LL << (bn%64);
- vreg = regbits;
- if(!(r->act.b[z] & bb))
- return vreg;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(!(r1->act.b[z] & bb))
- break;
- r = r1;
- }
- for(;;) {
- if(debug['R'] && debug['v'])
- print(" paint2 %d %P\n", depth, r->f.prog);
-
- r->act.b[z] &= ~bb;
-
- vreg |= r->regu;
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- vreg |= paint2(r1, bn, depth+1);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- vreg |= paint2(r1, bn, depth+1);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(!(r->act.b[z] & bb))
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
- return vreg;
-}
-
-void
-paint3(Reg *r, int bn, uint64 rb, int rn)
-{
- Reg *r1;
- Prog *p;
- int z;
- uint64 bb;
-
- z = bn/64;
- bb = 1LL << (bn%64);
- if(r->act.b[z] & bb)
- return;
- for(;;) {
- if(!(r->refbehind.b[z] & bb))
- break;
- r1 = (Reg*)r->f.p1;
- if(r1 == R)
- break;
- if(!(r1->refahead.b[z] & bb))
- break;
- if(r1->act.b[z] & bb)
- break;
- r = r1;
- }
-
- if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
- addmove(r, bn, rn, 0);
- for(;;) {
- r->act.b[z] |= bb;
- p = r->f.prog;
-
- if(r->use1.b[z] & bb) {
- if(debug['R'] && debug['v'])
- print("%P", p);
- addreg(&p->from, rn);
- if(debug['R'] && debug['v'])
- print(" ===change== %P\n", p);
- }
- if((r->use2.b[z]|r->set.b[z]) & bb) {
- if(debug['R'] && debug['v'])
- print("%P", p);
- addreg(&p->to, rn);
- if(debug['R'] && debug['v'])
- print(" ===change== %P\n", p);
- }
-
- if(STORE(r) & r->regdiff.b[z] & bb)
- addmove(r, bn, rn, 1);
- r->regu |= rb;
-
- if(r->refbehind.b[z] & bb)
- for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
- if(r1->refahead.b[z] & bb)
- paint3(r1, bn, rb, rn);
-
- if(!(r->refahead.b[z] & bb))
- break;
- r1 = (Reg*)r->f.s2;
- if(r1 != R)
- if(r1->refbehind.b[z] & bb)
- paint3(r1, bn, rb, rn);
- r = (Reg*)r->f.s1;
- if(r == R)
- break;
- if(r->act.b[z] & bb)
- break;
- if(!(r->refbehind.b[z] & bb))
- break;
- }
-}
-
-void
-addreg(Adr *a, int rn)
-{
- a->sym = nil;
- a->node = nil;
- a->name = NAME_NONE;
- a->type = TYPE_REG;
- a->reg = rn;
-
- ostats.ncvtreg++;
-}
-
/*
* track register variables including external registers:
* bit reg
return 0;
return bitno(b) + REG_F0;
}
-
-void
-dumpone(Flow *f, int isreg)
-{
- int z;
- Bits bit;
- Reg *r;
-
- print("%d:%P", f->loop, f->prog);
- if(isreg) {
- r = (Reg*)f;
- for(z=0; z<BITS; z++)
- bit.b[z] =
- r->set.b[z] |
- r->use1.b[z] |
- r->use2.b[z] |
- r->refbehind.b[z] |
- r->refahead.b[z] |
- r->calbehind.b[z] |
- r->calahead.b[z] |
- r->regdiff.b[z] |
- r->act.b[z] |
- 0;
- if(bany(&bit)) {
- print("\t");
- if(bany(&r->set))
- print(" s:%Q", r->set);
- if(bany(&r->use1))
- print(" u1:%Q", r->use1);
- if(bany(&r->use2))
- print(" u2:%Q", r->use2);
- if(bany(&r->refbehind))
- print(" rb:%Q ", r->refbehind);
- if(bany(&r->refahead))
- print(" ra:%Q ", r->refahead);
- if(bany(&r->calbehind))
- print(" cb:%Q ", r->calbehind);
- if(bany(&r->calahead))
- print(" ca:%Q ", r->calahead);
- if(bany(&r->regdiff))
- print(" d:%Q ", r->regdiff);
- if(bany(&r->act))
- print(" a:%Q ", r->act);
- }
- }
- print("\n");
-}
-
-
-void
-dumpit(char *str, Flow *r0, int isreg)
-{
- Flow *r, *r1;
-
- print("\n%s\n", str);
- for(r = r0; r != nil; r = r->link) {
- dumpone(r, isreg);
- r1 = r->p2;
- if(r1 != nil) {
- print(" pred:");
- for(; r1 != nil; r1 = r1->p2link)
- print(" %.4ud", (int)r1->prog->pc);
- print("\n");
- }
- // Print successors if it's not just the next one
- if(r->s1 != r->link || r->s2 != nil) {
- print(" succ:");
- if(r->s1 != nil)
- print(" %.4ud", (int)r->s1->prog->pc);
- if(r->s2 != nil)
- print(" %.4ud", (int)r->s2->prog->pc);
- print("\n");
- }
- }
-}
void (*ginscall)(Node*, int);
void (*igen)(Node*, Node*, Node*);
void (*linkarchinit)(void);
+ void (*peep)(Prog*);
void (*proginfo)(ProgInfo*, Prog*);
void (*regalloc)(Node*, Type*, Node*);
void (*regfree)(Node*);
- void (*regopt)(Prog*);
int (*regtyp)(Addr*);
int (*sameaddr)(Addr*, Addr*);
int (*smallindir)(Addr*, Addr*);
int (*stackaddr)(Addr*);
+ uint64 (*excludedregs)(void);
+ uint64 (*RtoB)(int);
+ uint64 (*FtoB)(int);
+ int (*BtoR)(uint64);
+ int (*BtoF)(uint64);
+ int (*optoas)(int, Type*);
+ uint64 (*doregbits)(int);
+ char **(*regnames)(int*);
};
void afunclit(Addr*, Node*);
void datagostring(Strlit *sval, Addr *a);
int ismem(Node*);
int samereg(Node*, Node*);
+void regopt(Prog*);
EXTERN int32 pcloc;
fixjmp(ptxt);
if(!debug['N'] || debug['R'] || debug['P']) {
- arch.regopt(ptxt);
+ regopt(ptxt);
nilopt(ptxt);
}
arch.expandchecks(ptxt);
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
-
#define Z N
#define Adr Addr
Bits regdiff;
Bits act;
- int32 regu; // register used bitmap
+ uint64 regu; // register used bitmap
};
#define R ((Reg*)0)
/*c2go extern Reg *R; */
short regno;
};
-EXTERN int32 exregoffset; // not set
-EXTERN int32 exfregoffset; // not set
EXTERN Reg zreg;
EXTERN Rgn region[NRGN];
EXTERN Rgn* rgp;
EXTERN int nregion;
EXTERN int nvar;
-EXTERN int32 regbits;
-EXTERN int32 exregbits;
+EXTERN uint64 regbits;
EXTERN Bits externs;
EXTERN Bits params;
EXTERN Bits consts;
Bits mkvar(Reg*, Adr*);
void prop(Reg*, Bits, Bits);
void synch(Reg*, Bits);
-uint32 allreg(uint32, Rgn*);
+uint64 allreg(uint64, Rgn*);
void paint1(Reg*, int);
-uint32 paint2(Reg*, int, int);
-void paint3(Reg*, int, uint32, int);
+uint64 paint2(Reg*, int, int);
+void paint3(Reg*, int, uint64, int);
void addreg(Adr*, int);
void dumpone(Flow*, int);
void dumpit(char*, Flow*, int);
/*
* peep.c
- */
void peep(Prog*);
void excise(Flow*);
int copyu(Prog*, Adr*, Adr*);
-
-uint32 RtoB(int);
-uint32 FtoB(int);
-int BtoR(uint32);
-int BtoF(uint32);
+ */
/*
* prog.c
- */
void proginfo(ProgInfo*, Prog*);
+ */
--- /dev/null
+// Derived from Inferno utils/6c/reg.c
+// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <u.h>
+#include <libc.h>
+#include "go.h"
+#include "popt.h"
+
+static Reg* firstr;
+static int first = 1;
+
+int
+rcmp(const void *a1, const void *a2)
+{
+ Rgn *p1, *p2;
+ int c1, c2;
+
+ p1 = (Rgn*)a1;
+ p2 = (Rgn*)a2;
+ c1 = p2->cost;
+ c2 = p1->cost;
+ if(c1 -= c2)
+ return c1;
+ return p2->varno - p1->varno;
+}
+
+static void
+setaddrs(Bits bit)
+{
+ int i, n;
+ Var *v;
+ Node *node;
+
+ while(bany(&bit)) {
+ // convert each bit to a variable
+ i = bnum(bit);
+ node = var[i].node;
+ n = var[i].name;
+ biclr(&bit, i);
+
+ // disable all pieces of that variable
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->node == node && v->name == n)
+ v->addr = 2;
+ }
+ }
+}
+
+static Node* regnodes[64];
+
+static void walkvardef(Node *n, Reg *r, int active);
+
+void
+regopt(Prog *firstp)
+{
+ Reg *r, *r1;
+ Prog *p;
+ Graph *g;
+ ProgInfo info;
+ int i, z, active;
+ uint64 vreg, usedreg;
+ uint64 mask;
+ int nreg;
+ char **regnames;
+ Bits bit;
+
+ if(first) {
+ fmtinstall('Q', Qconv);
+ first = 0;
+ }
+
+ mergetemp(firstp);
+
+ /*
+ * control flow is more complicated in generated go code
+ * than in generated c code. define pseudo-variables for
+ * registers, so we have complete register usage information.
+ */
+ regnames = arch.regnames(&nreg);
+ nvar = nreg;
+ memset(var, 0, nreg*sizeof var[0]);
+ for(i=0; i<nreg; i++) {
+ if(regnodes[i] == N)
+ regnodes[i] = newname(lookup(regnames[i]));
+ var[i].node = regnodes[i];
+ }
+
+ regbits = arch.excludedregs();
+ externs = zbits;
+ params = zbits;
+ consts = zbits;
+ addrs = zbits;
+ ivar = zbits;
+ ovar = zbits;
+
+ /*
+ * pass 1
+ * build aux data structure
+ * allocate pcs
+ * find use and set of variables
+ */
+ g = flowstart(firstp, sizeof(Reg));
+ if(g == nil) {
+ for(i=0; i<nvar; i++)
+ var[i].node->opt = nil;
+ return;
+ }
+
+ firstr = (Reg*)g->start;
+
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ p = r->f.prog;
+ if(p->as == AVARDEF || p->as == AVARKILL)
+ continue;
+ arch.proginfo(&info, p);
+
+ // Avoid making variables for direct-called functions.
+ if(p->as == ACALL && p->to.type == TYPE_MEM && p->to.name == NAME_EXTERN)
+ continue;
+
+ // from vs to doesn't matter for registers.
+ r->use1.b[0] |= info.reguse | info.regindex;
+ r->set.b[0] |= info.regset;
+
+ bit = mkvar(r, &p->from);
+ if(bany(&bit)) {
+ if(info.flags & LeftAddr)
+ setaddrs(bit);
+ if(info.flags & LeftRead)
+ for(z=0; z<BITS; z++)
+ r->use1.b[z] |= bit.b[z];
+ if(info.flags & LeftWrite)
+ for(z=0; z<BITS; z++)
+ r->set.b[z] |= bit.b[z];
+ }
+
+ // Compute used register for reg
+ if(info.flags & RegRead)
+ r->use1.b[0] |= arch.RtoB(p->reg);
+
+ // Currently we never generate three register forms.
+ // If we do, this will need to change.
+ if(p->from3.type != TYPE_NONE)
+ fatal("regopt not implemented for from3");
+
+ bit = mkvar(r, &p->to);
+ if(bany(&bit)) {
+ if(info.flags & RightAddr)
+ setaddrs(bit);
+ if(info.flags & RightRead)
+ for(z=0; z<BITS; z++)
+ r->use2.b[z] |= bit.b[z];
+ if(info.flags & RightWrite)
+ for(z=0; z<BITS; z++)
+ r->set.b[z] |= bit.b[z];
+ }
+ }
+
+ for(i=0; i<nvar; i++) {
+ Var *v = var+i;
+ if(v->addr) {
+ bit = blsh(i);
+ for(z=0; z<BITS; z++)
+ addrs.b[z] |= bit.b[z];
+ }
+
+ if(debug['R'] && debug['v'])
+ print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
+ i, v->addr, v->etype, v->width, v->node, v->offset);
+ }
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass1", &firstr->f, 1);
+
+ /*
+ * pass 2
+ * find looping structure
+ */
+ flowrpo(g);
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass2", &firstr->f, 1);
+
+ /*
+ * pass 2.5
+ * iterate propagating fat vardef covering forward
+ * r->act records vars with a VARDEF since the last CALL.
+ * (r->act will be reused in pass 5 for something else,
+ * but we'll be done with it by then.)
+ */
+ active = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ r->f.active = 0;
+ r->act = zbits;
+ }
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ p = r->f.prog;
+ if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) {
+ active++;
+ walkvardef(p->to.node, r, active);
+ }
+ }
+
+ /*
+ * pass 3
+ * iterate propagating usage
+ * back until flow graph is complete
+ */
+loop1:
+ change = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ r->f.active = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ if(r->f.prog->as == ARET)
+ prop(r, zbits, zbits);
+loop11:
+ /* pick up unreachable code */
+ i = 0;
+ for(r = firstr; r != R; r = r1) {
+ r1 = (Reg*)r->f.link;
+ if(r1 && r1->f.active && !r->f.active) {
+ prop(r, zbits, zbits);
+ i = 1;
+ }
+ }
+ if(i)
+ goto loop11;
+ if(change)
+ goto loop1;
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass3", &firstr->f, 1);
+
+ /*
+ * pass 4
+ * iterate propagating register/variable synchrony
+ * forward until graph is complete
+ */
+loop2:
+ change = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ r->f.active = 0;
+ synch(firstr, zbits);
+ if(change)
+ goto loop2;
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass4", &firstr->f, 1);
+
+ /*
+ * pass 4.5
+ * move register pseudo-variables into regu.
+ */
+ if(nreg == 64)
+ mask = ~0ULL; // can't rely on C to shift by 64
+ else
+ mask = (1ULL<<nreg) - 1;
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ r->regu = (r->refbehind.b[0] | r->set.b[0]) & mask;
+ r->set.b[0] &= ~mask;
+ r->use1.b[0] &= ~mask;
+ r->use2.b[0] &= ~mask;
+ r->refbehind.b[0] &= ~mask;
+ r->refahead.b[0] &= ~mask;
+ r->calbehind.b[0] &= ~mask;
+ r->calahead.b[0] &= ~mask;
+ r->regdiff.b[0] &= ~mask;
+ r->act.b[0] &= ~mask;
+ }
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass4.5", &firstr->f, 1);
+
+ /*
+ * pass 5
+ * isolate regions
+ * calculate costs (paint1)
+ */
+ r = firstr;
+ if(r) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
+ ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
+ if(bany(&bit) && !r->f.refset) {
+ // should never happen - all variables are preset
+ if(debug['w'])
+ print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
+ r->f.refset = 1;
+ }
+ }
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ r->act = zbits;
+ rgp = region;
+ nregion = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = r->set.b[z] &
+ ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
+ if(bany(&bit) && !r->f.refset) {
+ if(debug['w'])
+ print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
+ r->f.refset = 1;
+ arch.excise(&r->f);
+ }
+ for(z=0; z<BITS; z++)
+ bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
+ while(bany(&bit)) {
+ i = bnum(bit);
+ rgp->enter = r;
+ rgp->varno = i;
+ change = 0;
+ paint1(r, i);
+ biclr(&bit, i);
+ if(change <= 0)
+ continue;
+ rgp->cost = change;
+ nregion++;
+ if(nregion >= NRGN) {
+ if(debug['R'] && debug['v'])
+ print("too many regions\n");
+ goto brk;
+ }
+ rgp++;
+ }
+ }
+brk:
+ qsort(region, nregion, sizeof(region[0]), rcmp);
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass5", &firstr->f, 1);
+
+ /*
+ * pass 6
+ * determine used registers (paint2)
+ * replace code (paint3)
+ */
+ rgp = region;
+ if(debug['R'] && debug['v'])
+ print("\nregisterizing\n");
+ for(i=0; i<nregion; i++) {
+ if(debug['R'] && debug['v'])
+ print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
+ bit = blsh(rgp->varno);
+ usedreg = paint2(rgp->enter, rgp->varno, 0);
+ vreg = allreg(usedreg, rgp);
+ if(rgp->regno != 0) {
+ if(debug['R'] && debug['v']) {
+ Var *v;
+
+ v = var + rgp->varno;
+ print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%#llx vreg=%#llx\n",
+ v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg);
+ }
+ paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
+ }
+ rgp++;
+ }
+
+ /*
+ * free aux structures. peep allocates new ones.
+ */
+ for(i=0; i<nvar; i++)
+ var[i].node->opt = nil;
+ flowend(g);
+ firstr = R;
+
+ if(debug['R'] && debug['v']) {
+ // Rebuild flow graph, since we inserted instructions
+ g = flowstart(firstp, sizeof(Reg));
+ firstr = (Reg*)g->start;
+ dumpit("pass6", &firstr->f, 1);
+ flowend(g);
+ firstr = R;
+ }
+
+ /*
+ * pass 7
+ * peep-hole on basic block
+ */
+ if(!debug['R'] || debug['P'])
+ arch.peep(firstp);
+
+ /*
+ * eliminate nops
+ */
+ for(p=firstp; p!=P; p=p->link) {
+ while(p->link != P && p->link->as == ANOP)
+ p->link = p->link->link;
+ if(p->to.type == TYPE_BRANCH)
+ while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
+ p->to.u.branch = p->to.u.branch->link;
+ }
+
+ if(debug['R']) {
+ if(ostats.ncvtreg ||
+ ostats.nspill ||
+ ostats.nreload ||
+ ostats.ndelmov ||
+ ostats.nvar ||
+ ostats.naddr ||
+ 0)
+ print("\nstats\n");
+
+ if(ostats.ncvtreg)
+ print(" %4d cvtreg\n", ostats.ncvtreg);
+ if(ostats.nspill)
+ print(" %4d spill\n", ostats.nspill);
+ if(ostats.nreload)
+ print(" %4d reload\n", ostats.nreload);
+ if(ostats.ndelmov)
+ print(" %4d delmov\n", ostats.ndelmov);
+ if(ostats.nvar)
+ print(" %4d var\n", ostats.nvar);
+ if(ostats.naddr)
+ print(" %4d addr\n", ostats.naddr);
+
+ memset(&ostats, 0, sizeof(ostats));
+ }
+}
+
+static void
+walkvardef(Node *n, Reg *r, int active)
+{
+ Reg *r1, *r2;
+ int bn;
+ Var *v;
+
+ for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
+ if(r1->f.active == active)
+ break;
+ r1->f.active = active;
+ if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
+ break;
+ for(v=n->opt; v!=nil; v=v->nextinnode) {
+ bn = v - var;
+ biset(&r1->act, bn);
+ }
+ if(r1->f.prog->as == ACALL)
+ break;
+ }
+
+ for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
+ if(r2->f.s2 != nil)
+ walkvardef(n, (Reg*)r2->f.s2, active);
+}
+
+/*
+ * add mov b,rn
+ * just after r
+ */
+void
+addmove(Reg *r, int bn, int rn, int f)
+{
+ Prog *p, *p1;
+ Adr *a;
+ Var *v;
+
+ p1 = mal(sizeof(*p1));
+ clearp(p1);
+ p1->pc = 9999;
+
+ p = r->f.prog;
+ p1->link = p->link;
+ p->link = p1;
+ p1->lineno = p->lineno;
+
+ v = var + bn;
+
+ a = &p1->to;
+ a->offset = v->offset;
+ a->etype = v->etype;
+ a->type = TYPE_MEM;
+ a->name = v->name;
+ a->node = v->node;
+ a->sym = linksym(v->node->sym);
+ /* NOTE(rsc): 9g did
+ if(a->etype == TARRAY)
+ a->type = TYPE_ADDR;
+ else if(a->sym == nil)
+ a->type = TYPE_CONST;
+ */
+
+ p1->as = arch.optoas(OAS, types[(uchar)v->etype]);
+ // TODO(rsc): Remove special case here.
+ if((arch.thechar == '9' || arch.thechar == '5') && v->etype == TBOOL)
+ p1->as = arch.optoas(OAS, types[TUINT8]);
+ p1->from.type = TYPE_REG;
+ p1->from.reg = rn;
+ p1->from.name = NAME_NONE;
+ if(!f) {
+ p1->from = *a;
+ *a = zprog.from;
+ a->type = TYPE_REG;
+ a->reg = rn;
+ }
+ if(debug['R'] && debug['v'])
+ print("%P ===add=== %P\n", p, p1);
+ ostats.nspill++;
+}
+
+static int
+overlap(int64 o1, int w1, int64 o2, int w2)
+{
+ int64 t1, t2;
+
+ t1 = o1+w1;
+ t2 = o2+w2;
+
+ if(!(t1 > o2 && t2 > o1))
+ return 0;
+
+ return 1;
+}
+
+Bits
+mkvar(Reg *r, Adr *a)
+{
+ Var *v;
+ int i, n, et, z, flag;
+ int64 w;
+ uint64 regu;
+ int64 o;
+ Bits bit;
+ Node *node;
+
+ /*
+ * mark registers used
+ */
+ if(a->type == TYPE_NONE)
+ goto none;
+
+ if(r != R)
+ r->use1.b[0] |= arch.doregbits(a->index); // TODO: Use RtoB
+
+ switch(a->type) {
+ default:
+ regu = arch.doregbits(a->reg) | arch.RtoB(a->reg); // TODO: Use RtoB
+ if(regu == 0)
+ goto none;
+ bit = zbits;
+ bit.b[0] = regu;
+ return bit;
+
+ case TYPE_ADDR:
+ // TODO(rsc): Remove special case here.
+ if(arch.thechar == '9' || arch.thechar == '5')
+ goto memcase;
+ a->type = TYPE_MEM;
+ bit = mkvar(r, a);
+ setaddrs(bit);
+ a->type = TYPE_ADDR;
+ ostats.naddr++;
+ goto none;
+
+ case TYPE_MEM:
+ memcase:
+ if(r != R) {
+ r->use1.b[0] |= arch.RtoB(a->reg);
+ /* NOTE: 5g did
+ if(r->f.prog->scond & (C_PBIT|C_WBIT))
+ r->set.b[0] |= RtoB(a->reg);
+ */
+ }
+ switch(a->name) {
+ default:
+ goto none;
+ case NAME_EXTERN:
+ case NAME_STATIC:
+ case NAME_PARAM:
+ case NAME_AUTO:
+ n = a->name;
+ break;
+ }
+ }
+
+ node = a->node;
+ if(node == N || node->op != ONAME || node->orig == N)
+ goto none;
+ node = node->orig;
+ if(node->orig != node)
+ fatal("%D: bad node", a);
+ if(node->sym == S || node->sym->name[0] == '.')
+ goto none;
+ et = a->etype;
+ o = a->offset;
+ w = a->width;
+ if(w < 0)
+ fatal("bad width %lld for %D", w, a);
+
+ flag = 0;
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->node == node && v->name == n) {
+ if(v->offset == o)
+ if(v->etype == et)
+ if(v->width == w) {
+ // TODO(rsc): Remove special case for arm here.
+ if(!flag || arch.thechar != '5')
+ return blsh(i);
+ }
+
+ // if they overlap, disable both
+ if(overlap(v->offset, v->width, o, w)) {
+// print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
+ v->addr = 1;
+ flag = 1;
+ }
+ }
+ }
+
+ switch(et) {
+ case 0:
+ case TFUNC:
+ goto none;
+ }
+
+ if(nvar >= NVAR) {
+ if(debug['w'] > 1 && node != N)
+ fatal("variable not optimized: %#N", node);
+
+ // If we're not tracking a word in a variable, mark the rest as
+ // having its address taken, so that we keep the whole thing
+ // live at all calls. otherwise we might optimize away part of
+ // a variable but not all of it.
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->node == node)
+ v->addr = 1;
+ }
+ goto none;
+ }
+
+ i = nvar;
+ nvar++;
+ v = var+i;
+ v->offset = o;
+ v->name = n;
+ v->etype = et;
+ v->width = w;
+ v->addr = flag; // funny punning
+ v->node = node;
+
+ // node->opt is the head of a linked list
+ // of Vars within the given Node, so that
+ // we can start at a Var and find all the other
+ // Vars in the same Go variable.
+ v->nextinnode = node->opt;
+ node->opt = v;
+
+ bit = blsh(i);
+ if(n == NAME_EXTERN || n == NAME_STATIC)
+ for(z=0; z<BITS; z++)
+ externs.b[z] |= bit.b[z];
+ if(n == NAME_PARAM)
+ for(z=0; z<BITS; z++)
+ params.b[z] |= bit.b[z];
+
+ if(node->class == PPARAM)
+ for(z=0; z<BITS; z++)
+ ivar.b[z] |= bit.b[z];
+ if(node->class == PPARAMOUT)
+ for(z=0; z<BITS; z++)
+ ovar.b[z] |= bit.b[z];
+
+ // Treat values with their address taken as live at calls,
+ // because the garbage collector's liveness analysis in ../gc/plive.c does.
+ // These must be consistent or else we will elide stores and the garbage
+ // collector will see uninitialized data.
+ // The typical case where our own analysis is out of sync is when the
+ // node appears to have its address taken but that code doesn't actually
+ // get generated and therefore doesn't show up as an address being
+ // taken when we analyze the instruction stream.
+ // One instance of this case is when a closure uses the same name as
+ // an outer variable for one of its own variables declared with :=.
+ // The parser flags the outer variable as possibly shared, and therefore
+ // sets addrtaken, even though it ends up not being actually shared.
+ // If we were better about _ elision, _ = &x would suffice too.
+ // The broader := in a closure problem is mentioned in a comment in
+ // closure.c:/^typecheckclosure and dcl.c:/^oldname.
+ if(node->addrtaken)
+ v->addr = 1;
+
+ // Disable registerization for globals, because:
+ // (1) we might panic at any time and we want the recovery code
+ // to see the latest values (issue 1304).
+ // (2) we don't know what pointers might point at them and we want
+ // loads via those pointers to see updated values and vice versa (issue 7995).
+ //
+ // Disable registerization for results if using defer, because the deferred func
+ // might recover and return, causing the current values to be used.
+ if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
+ v->addr = 1;
+
+ if(debug['R'])
+ print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
+ ostats.nvar++;
+
+ return bit;
+
+none:
+ return zbits;
+}
+
+void
+prop(Reg *r, Bits ref, Bits cal)
+{
+ Reg *r1, *r2;
+ int z, i, j;
+ Var *v, *v1;
+
+ for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
+ for(z=0; z<BITS; z++) {
+ ref.b[z] |= r1->refahead.b[z];
+ if(ref.b[z] != r1->refahead.b[z]) {
+ r1->refahead.b[z] = ref.b[z];
+ change++;
+ }
+ cal.b[z] |= r1->calahead.b[z];
+ if(cal.b[z] != r1->calahead.b[z]) {
+ r1->calahead.b[z] = cal.b[z];
+ change++;
+ }
+ }
+ switch(r1->f.prog->as) {
+ case ACALL:
+ if(noreturn(r1->f.prog))
+ break;
+
+ // Mark all input variables (ivar) as used, because that's what the
+ // liveness bitmaps say. The liveness bitmaps say that so that a
+ // panic will not show stale values in the parameter dump.
+ // Mark variables with a recent VARDEF (r1->act) as used,
+ // so that the optimizer flushes initializations to memory,
+ // so that if a garbage collection happens during this CALL,
+ // the collector will see initialized memory. Again this is to
+ // match what the liveness bitmaps say.
+ for(z=0; z<BITS; z++) {
+ cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
+ ref.b[z] = 0;
+ }
+
+ // cal.b is the current approximation of what's live across the call.
+ // Every bit in cal.b is a single stack word. For each such word,
+ // find all the other tracked stack words in the same Go variable
+ // (struct/slice/string/interface) and mark them live too.
+ // This is necessary because the liveness analysis for the garbage
+ // collector works at variable granularity, not at word granularity.
+ // It is fundamental for slice/string/interface: the garbage collector
+ // needs the whole value, not just some of the words, in order to
+ // interpret the other bits correctly. Specifically, slice needs a consistent
+ // ptr and cap, string needs a consistent ptr and len, and interface
+ // needs a consistent type word and data word.
+ for(z=0; z<BITS; z++) {
+ if(cal.b[z] == 0)
+ continue;
+ for(i=0; i<64; i++) {
+ if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
+ continue;
+ v = var+z*64+i;
+ if(v->node->opt == nil) // v represents fixed register, not Go variable
+ continue;
+
+ // v->node->opt is the head of a linked list of Vars
+ // corresponding to tracked words from the Go variable v->node.
+ // Walk the list and set all the bits.
+ // For a large struct this could end up being quadratic:
+ // after the first setting, the outer loop (for z, i) would see a 1 bit
+ // for all of the remaining words in the struct, and for each such
+ // word would go through and turn on all the bits again.
+ // To avoid the quadratic behavior, we only turn on the bits if
+ // v is the head of the list or if the head's bit is not yet turned on.
+ // This will set the bits at most twice, keeping the overall loop linear.
+ v1 = v->node->opt;
+ j = v1 - var;
+ if(v == v1 || !btest(&cal, j)) {
+ for(; v1 != nil; v1 = v1->nextinnode) {
+ j = v1 - var;
+ biset(&cal, j);
+ }
+ }
+ }
+ }
+ break;
+
+ case ATEXT:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = 0;
+ ref.b[z] = 0;
+ }
+ break;
+
+ case ARET:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = externs.b[z] | ovar.b[z];
+ ref.b[z] = 0;
+ }
+ break;
+ }
+ for(z=0; z<BITS; z++) {
+ ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
+ r1->use1.b[z] | r1->use2.b[z];
+ cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
+ r1->refbehind.b[z] = ref.b[z];
+ r1->calbehind.b[z] = cal.b[z];
+ }
+ if(r1->f.active)
+ break;
+ r1->f.active = 1;
+ }
+ for(; r != r1; r = (Reg*)r->f.p1)
+ for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
+ prop(r2, r->refbehind, r->calbehind);
+}
+
+void
+synch(Reg *r, Bits dif)
+{
+ Reg *r1;
+ int z;
+
+ for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
+ for(z=0; z<BITS; z++) {
+ dif.b[z] = (dif.b[z] &
+ ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
+ r1->set.b[z] | r1->regdiff.b[z];
+ if(dif.b[z] != r1->regdiff.b[z]) {
+ r1->regdiff.b[z] = dif.b[z];
+ change++;
+ }
+ }
+ if(r1->f.active)
+ break;
+ r1->f.active = 1;
+ for(z=0; z<BITS; z++)
+ dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
+ if(r1->f.s2 != nil)
+ synch((Reg*)r1->f.s2, dif);
+ }
+}
+
+uint64
+allreg(uint64 b, Rgn *r)
+{
+ Var *v;
+ int i;
+
+ v = var + r->varno;
+ r->regno = 0;
+ switch(v->etype) {
+
+ default:
+ fatal("unknown etype %d/%E", bitno(b), v->etype);
+ break;
+
+ case TINT8:
+ case TUINT8:
+ case TINT16:
+ case TUINT16:
+ case TINT32:
+ case TUINT32:
+ case TINT64:
+ case TUINT64:
+ case TINT:
+ case TUINT:
+ case TUINTPTR:
+ case TBOOL:
+ case TPTR32:
+ case TPTR64:
+ i = arch.BtoR(~b);
+ if(i && r->cost > 0) {
+ r->regno = i;
+ return arch.RtoB(i);
+ }
+ break;
+
+ case TFLOAT32:
+ case TFLOAT64:
+ i = arch.BtoF(~b);
+ if(i && r->cost > 0) {
+ r->regno = i;
+ return arch.FtoB(i);
+ }
+ break;
+ }
+ return 0;
+}
+
+void
+paint1(Reg *r, int bn)
+{
+ Reg *r1;
+ int z;
+ uint64 bb;
+
+ z = bn/64;
+ bb = 1LL<<(bn%64);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
+ change -= CLOAD * r->f.loop;
+ }
+ for(;;) {
+ r->act.b[z] |= bb;
+
+ if(r->f.prog->as != ANOP) { // don't give credit for NOPs
+ if(r->use1.b[z] & bb)
+ change += CREF * r->f.loop;
+ if((r->use2.b[z]|r->set.b[z]) & bb)
+ change += CREF * r->f.loop;
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb) {
+ change -= CLOAD * r->f.loop;
+ }
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+ if(r1->refahead.b[z] & bb)
+ paint1(r1, bn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint1(r1, bn);
+ r = (Reg*)r->f.s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+uint64
+paint2(Reg *r, int bn, int depth)
+{
+ Reg *r1;
+ int z;
+ uint64 bb, vreg;
+
+ z = bn/64;
+ bb = 1LL << (bn%64);
+ vreg = regbits;
+ if(!(r->act.b[z] & bb))
+ return vreg;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(!(r1->act.b[z] & bb))
+ break;
+ r = r1;
+ }
+ for(;;) {
+ if(debug['R'] && debug['v'])
+ print(" paint2 %d %P\n", depth, r->f.prog);
+
+ r->act.b[z] &= ~bb;
+
+ vreg |= r->regu;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+ if(r1->refahead.b[z] & bb)
+ vreg |= paint2(r1, bn, depth+1);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ vreg |= paint2(r1, bn, depth+1);
+ r = (Reg*)r->f.s1;
+ if(r == R)
+ break;
+ if(!(r->act.b[z] & bb))
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+
+ return vreg;
+}
+
+void
+paint3(Reg *r, int bn, uint64 rb, int rn)
+{
+ Reg *r1;
+ Prog *p;
+ int z;
+ uint64 bb;
+
+ z = bn/64;
+ bb = 1LL << (bn%64);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
+ addmove(r, bn, rn, 0);
+ for(;;) {
+ r->act.b[z] |= bb;
+ p = r->f.prog;
+
+ if(r->use1.b[z] & bb) {
+ if(debug['R'] && debug['v'])
+ print("%P", p);
+ addreg(&p->from, rn);
+ if(debug['R'] && debug['v'])
+ print(" ===change== %P\n", p);
+ }
+ if((r->use2.b[z]|r->set.b[z]) & bb) {
+ if(debug['R'] && debug['v'])
+ print("%P", p);
+ addreg(&p->to, rn);
+ if(debug['R'] && debug['v'])
+ print(" ===change== %P\n", p);
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb)
+ addmove(r, bn, rn, 1);
+ r->regu |= rb;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+ if(r1->refahead.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+ r = (Reg*)r->f.s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+void
+addreg(Adr *a, int rn)
+{
+ a->sym = nil;
+ a->node = nil;
+ a->offset = 0;
+ a->type = TYPE_REG;
+ a->reg = rn;
+ a->name = 0;
+
+ ostats.ncvtreg++;
+}
+
+void
+dumpone(Flow *f, int isreg)
+{
+ int z;
+ Bits bit;
+ Reg *r;
+
+ print("%d:%P", f->loop, f->prog);
+ if(isreg) {
+ r = (Reg*)f;
+ for(z=0; z<BITS; z++)
+ bit.b[z] =
+ r->set.b[z] |
+ r->use1.b[z] |
+ r->use2.b[z] |
+ r->refbehind.b[z] |
+ r->refahead.b[z] |
+ r->calbehind.b[z] |
+ r->calahead.b[z] |
+ r->regdiff.b[z] |
+ r->act.b[z] |
+ 0;
+ if(bany(&bit)) {
+ print("\t");
+ if(bany(&r->set))
+ print(" s:%Q", r->set);
+ if(bany(&r->use1))
+ print(" u1:%Q", r->use1);
+ if(bany(&r->use2))
+ print(" u2:%Q", r->use2);
+ if(bany(&r->refbehind))
+ print(" rb:%Q ", r->refbehind);
+ if(bany(&r->refahead))
+ print(" ra:%Q ", r->refahead);
+ if(bany(&r->calbehind))
+ print(" cb:%Q ", r->calbehind);
+ if(bany(&r->calahead))
+ print(" ca:%Q ", r->calahead);
+ if(bany(&r->regdiff))
+ print(" d:%Q ", r->regdiff);
+ if(bany(&r->act))
+ print(" a:%Q ", r->act);
+ }
+ }
+ print("\n");
+}
+
+void
+dumpit(char *str, Flow *r0, int isreg)
+{
+ Flow *r, *r1;
+
+ print("\n%s\n", str);
+ for(r = r0; r != nil; r = r->link) {
+ dumpone(r, isreg);
+ r1 = r->p2;
+ if(r1 != nil) {
+ print(" pred:");
+ for(; r1 != nil; r1 = r1->p2link)
+ print(" %.4ud", (int)r1->prog->pc);
+ if(r->p1 != nil)
+ print(" (and %.4ud)", (int)r->p1->prog->pc);
+ else
+ print(" (only)");
+ print("\n");
+ }
+ // Print successors if it's not just the next one
+ if(r->s1 != r->link || r->s2 != nil) {
+ print(" succ:");
+ if(r->s1 != nil)
+ print(" %.4ud", (int)r->s1->prog->pc);
+ if(r->s2 != nil)
+ print(" %.4ud", (int)r->s2->prog->pc);
+ print("\n");
+ }
+ }
+}