]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/gc: move reg.c into portable code
authorRuss Cox <rsc@golang.org>
Wed, 4 Feb 2015 00:23:18 +0000 (19:23 -0500)
committerRuss Cox <rsc@golang.org>
Wed, 11 Feb 2015 20:37:38 +0000 (20:37 +0000)
Now there is only one registerizer shared among all the systems.
There are some unfortunate special cases based on arch.thechar
in reg.c, to preserve bit-for-bit compatibility during the refactoring.
Most are probably bugs one way or another and should be revisited.

Change-Id: I153b435c0eaa05bbbeaf8876822eeb6dedaae3cf
Reviewed-on: https://go-review.googlesource.com/3883
Reviewed-by: Austin Clements <austin@google.com>
32 files changed:
src/cmd/5g/galign.c
src/cmd/5g/gg.h
src/cmd/5g/ggen.c
src/cmd/5g/opt.h [deleted file]
src/cmd/5g/peep.c
src/cmd/5g/prog.c
src/cmd/5g/reg.c
src/cmd/6g/galign.c
src/cmd/6g/gg.h
src/cmd/6g/ggen.c
src/cmd/6g/peep.c
src/cmd/6g/prog.c
src/cmd/6g/reg.c
src/cmd/8g/galign.c
src/cmd/8g/gg.h
src/cmd/8g/ggen.c
src/cmd/8g/gsubr.c
src/cmd/8g/opt.h [deleted file]
src/cmd/8g/peep.c
src/cmd/8g/prog.c
src/cmd/8g/reg.c
src/cmd/9g/galign.c
src/cmd/9g/gg.h
src/cmd/9g/ggen.c
src/cmd/9g/opt.h
src/cmd/9g/peep.c
src/cmd/9g/prog.c
src/cmd/9g/reg.c
src/cmd/gc/go.h
src/cmd/gc/pgen.c
src/cmd/gc/popt.h [moved from src/cmd/6g/opt.h with 93% similarity]
src/cmd/gc/reg.c [new file with mode: 0644]

index c136406a79a5b8d088c04587bad02fdc763b7fe0..394b36d84eb959814f3dc7344bda463e403663c9 100644 (file)
@@ -66,14 +66,22 @@ main(int argc, char **argv)
        arch.ginscall = ginscall;
        arch.igen = igen;
        arch.linkarchinit = linkarchinit;
+       arch.peep = peep;
        arch.proginfo = proginfo;
        arch.regalloc = regalloc;
        arch.regfree = regfree;
-       arch.regopt = regopt;
        arch.regtyp = regtyp;
        arch.sameaddr = sameaddr;
        arch.smallindir = smallindir;
        arch.stackaddr = stackaddr;
+       arch.excludedregs = excludedregs;
+       arch.RtoB = RtoB;
+       arch.FtoB = RtoB;
+       arch.BtoR = BtoR;
+       arch.BtoF = BtoF;
+       arch.optoas = optoas;
+       arch.doregbits = doregbits;
+       arch.regnames = regnames;
        
        gcmain(argc, argv);
 }
index 210c9c2cc74e952793ae29d11091a844b9db3033..b12c7e2561ba091d56808af227e74f7c1683f82d 100644 (file)
@@ -159,3 +159,19 @@ int sameaddr(Addr*, Addr*);
 int smallindir(Addr*, Addr*);
 int stackaddr(Addr*);
 Prog* unpatch(Prog*);
+
+/*
+ * reg.c
+ */
+uint64 excludedregs(void);
+uint64 RtoB(int);
+uint64 FtoB(int);
+int BtoR(uint64);
+int BtoF(uint64);
+uint64 doregbits(int);
+char** regnames(int*);
+
+/*
+ * peep.c
+ */
+void peep(Prog*);
index f91cd7151818daa0daf86aff322c2f70ed0613ac..62b9beadb0a7929b8b3972ffe0bd1117ef190372 100644 (file)
@@ -7,7 +7,7 @@
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
 static Prog* appendpp(Prog*, int, int, int, int32, int, int, int32);
 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *r0);
diff --git a/src/cmd/5g/opt.h b/src/cmd/5g/opt.h
deleted file mode 100644 (file)
index 5246074..0000000
+++ /dev/null
@@ -1,179 +0,0 @@
-// Inferno utils/5c/gc.h
-// http://code.google.com/p/inferno-os/source/browse/utils/5c/gc.h
-//
-//     Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
-//     Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
-//     Portions Copyright © 1997-1999 Vita Nuova Limited
-//     Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
-//     Portions Copyright © 2004,2006 Bruce Ellis
-//     Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
-//     Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
-//     Portions Copyright © 2009 The Go Authors.  All rights reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-
-#define        Z       N
-#define        Adr     Addr
-
-#define        D_HI    TYPE_NONE
-#define        D_LO    TYPE_NONE
-
-#define        BLOAD(r)        band(bnot(r->refbehind), r->refahead)
-#define        BSTORE(r)       band(bnot(r->calbehind), r->calahead)
-#define        LOAD(r)         (~r->refbehind.b[z] & r->refahead.b[z])
-#define        STORE(r)        (~r->calbehind.b[z] & r->calahead.b[z])
-
-#define        CLOAD   5
-#define        CREF    5
-#define        CINF    1000
-#define        LOOP    3
-
-typedef        struct  Reg     Reg;
-typedef        struct  Rgn     Rgn;
-
-/*c2go
-extern Node *Z;
-enum
-{
-       D_HI = TYPE_NONE,
-       D_LO = TYPE_NONE,
-       CLOAD = 5,
-       CREF = 5,
-       CINF = 1000,
-       LOOP = 3,
-};
-
-uint32 BLOAD(Reg*);
-uint32 BSTORE(Reg*);
-uint64 LOAD(Reg*);
-uint64 STORE(Reg*);
-*/
-
-// A Reg is a wrapper around a single Prog (one instruction) that holds
-// register optimization information while the optimizer runs.
-// r->prog is the instruction.
-// r->prog->opt points back to r.
-struct Reg
-{
-       Flow    f;
-
-       Bits    set;            // regopt variables written by this instruction.
-       Bits    use1;           // regopt variables read by prog->from.
-       Bits    use2;           // regopt variables read by prog->to.
-
-       // refahead/refbehind are the regopt variables whose current
-       // value may be used in the following/preceding instructions
-       // up to a CALL (or the value is clobbered).
-       Bits    refbehind;
-       Bits    refahead;
-       // calahead/calbehind are similar, but for variables in
-       // instructions that are reachable after hitting at least one
-       // CALL.
-       Bits    calbehind;
-       Bits    calahead;
-       Bits    regdiff;
-       Bits    act;
-
-       int32   regu;           // register used bitmap
-};
-#define        R       ((Reg*)0)
-/*c2go extern Reg *R; */
-
-#define        NRGN    600
-/*c2go enum { NRGN = 600 }; */
-
-// A Rgn represents a single regopt variable over a region of code
-// where a register could potentially be dedicated to that variable.
-// The code encompassed by a Rgn is defined by the flow graph,
-// starting at enter, flood-filling forward while varno is refahead
-// and backward while varno is refbehind, and following branches.  A
-// single variable may be represented by multiple disjoint Rgns and
-// each Rgn may choose a different register for that variable.
-// Registers are allocated to regions greedily in order of descending
-// cost.
-struct Rgn
-{
-       Reg*    enter;
-       short   cost;
-       short   varno;
-       short   regno;
-};
-
-EXTERN Reg     zreg;
-EXTERN Reg*    freer;
-EXTERN Reg**   rpo2r;
-EXTERN Rgn     region[NRGN];
-EXTERN Rgn*    rgp;
-EXTERN int     nregion;
-EXTERN int     nvar;
-EXTERN int32   regbits;
-EXTERN Bits    externs;
-EXTERN Bits    params;
-EXTERN Bits    consts;
-EXTERN Bits    addrs;
-EXTERN Bits    ivar;
-EXTERN Bits    ovar;
-EXTERN int     change;
-EXTERN int32   maxnr;
-EXTERN int32*  idom;
-
-EXTERN struct
-{
-       int32   ncvtreg;
-       int32   nspill;
-       int32   nreload;
-       int32   ndelmov;
-       int32   nvar;
-       int32   naddr;
-} ostats;
-
-/*
- * reg.c
- */
-Reg*   rega(void);
-int    rcmp(const void*, const void*);
-void   regopt(Prog*);
-void   addmove(Reg*, int, int, int);
-Bits   mkvar(Reg *r, Adr *a);
-void   prop(Reg*, Bits, Bits);
-void   synch(Reg*, Bits);
-uint32 allreg(uint32, Rgn*);
-void   paint1(Reg*, int);
-uint32 paint2(Reg*, int, int);
-void   paint3(Reg*, int, uint32, int);
-void   addreg(Adr*, int);
-void   dumpit(char *str, Flow *r0, int);
-
-/*
- * peep.c
- */
-void   peep(Prog*);
-void   excise(Flow*);
-int    copyu(Prog*, Adr*, Adr*);
-
-uint32 RtoB(int);
-uint32 FtoB(int);
-int    BtoR(uint32);
-int    BtoF(uint32);
-
-/*
- * prog.c
- */
-void proginfo(ProgInfo*, Prog*);
index 1a4df8d622490dc739b9f706601e9865eb85d9be..c9910d1134fb7c4e69a9f9c49d11e81d13710b14 100644 (file)
@@ -32,7 +32,7 @@
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
 static int     xtramodes(Graph*, Flow*, Adr*);
 static int     shortprop(Flow *r);
@@ -47,6 +47,7 @@ static Flow*  findpre(Flow *r, Adr *v);
 static int     copyau1(Prog *p, Adr *v);
 static int     isdconst(Addr *a);
 static int     isfloatreg(Addr*);
+static int     copyu(Prog *p, Adr *v, Adr *s);
 
 static uint32  gactive;
 
@@ -941,7 +942,7 @@ xtramodes(Graph *g, Flow *r, Adr *a)
  * 4 if set and used
  * 0 otherwise (not touched)
  */
-int
+static int
 copyu(Prog *p, Adr *v, Adr *s)
 {
        switch(p->as) {
@@ -1572,3 +1573,12 @@ smallindir(Addr *a, Addr *reg)
                a->reg == reg->reg &&
                0 <= a->offset && a->offset < 4096;
 }
+
+void
+excise(Flow *r)
+{
+       Prog *p;
+
+       p = r->prog;
+       nopout(p);
+}
index a77f2336e9697c1af2978c385486c7ada412bcc8..9d5adefe698f472f6d2e8930bc5177977af3bd69 100644 (file)
@@ -5,7 +5,7 @@
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
 enum
 {
@@ -148,4 +148,13 @@ proginfo(ProgInfo *info, Prog *p)
        
        if(((p->scond & C_SCOND) != C_SCOND_NONE) && (info->flags & RightWrite))
                info->flags |= RightRead;
+       
+       switch(p->as) {
+       case ADIV:
+       case ADIVU:
+       case AMOD:
+       case AMODU:
+               info->regset |= RtoB(REG_R12);
+               break;
+       }
 }
index ec21c6abf7a8ccf5f31bf309c517f1929116f5a0..1216e01bd5c14febd978f1d08e3e6a16662924d4 100644 (file)
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
-#define        NREGVAR 32
-#define        REGBITS ((uint64)0xffffffffull)
-/*c2go enum {
+enum {
        NREGVAR = 32,
-       REGBITS = 0xffffffff,
 };
-*/
-
-       void    addsplits(void);
-static Reg*    firstr;
-static int     first   = 1;
-
-int
-rcmp(const void *a1, const void *a2)
-{
-       Rgn *p1, *p2;
-       int c1, c2;
-
-       p1 = (Rgn*)a1;
-       p2 = (Rgn*)a2;
-       c1 = p2->cost;
-       c2 = p1->cost;
-       if(c1 -= c2)
-               return c1;
-       return p2->varno - p1->varno;
-}
-
-void
-excise(Flow *r)
-{
-       Prog *p;
-
-       p = r->prog;
-       nopout(p);
-}
-
-static void
-setaddrs(Bits bit)
-{
-       int i, n;
-       Var *v;
-       Node *node;
-
-       while(bany(&bit)) {
-               // convert each bit to a variable
-               i = bnum(bit);
-               node = var[i].node;
-               n = var[i].name;
-               biclr(&bit, i);
-
-               // disable all pieces of that variable
-               for(i=0; i<nvar; i++) {
-                       v = var+i;
-                       if(v->node == node && v->name == n)
-                               v->addr = 2;
-               }
-       }
-}
 
 static char* regname[] = {
        ".R0",
@@ -128,1188 +73,26 @@ static char* regname[] = {
        ".F15",
 };
 
-static Node* regnodes[NREGVAR];
-
-static void walkvardef(Node *n, Reg *r, int active);
-
-void
-regopt(Prog *firstp)
-{
-       Reg *r, *r1;
-       Prog *p;
-       Graph *g;
-       int i, z, active;
-       uint32 vreg;
-       Bits bit;
-       ProgInfo info;
-
-       if(first) {
-               fmtinstall('Q', Qconv);
-               first = 0;
-       }
-
-       mergetemp(firstp);
-
-       /*
-        * control flow is more complicated in generated go code
-        * than in generated c code.  define pseudo-variables for
-        * registers, so we have complete register usage information.
-        */
-       nvar = NREGVAR;
-       memset(var, 0, NREGVAR*sizeof var[0]);
-       for(i=0; i<NREGVAR; i++) {
-               if(regnodes[i] == N)
-                       regnodes[i] = newname(lookup(regname[i]));
-               var[i].node = regnodes[i];
-       }
-
-       regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
-       for(z=0; z<BITS; z++) {
-               externs.b[z] = 0;
-               params.b[z] = 0;
-               consts.b[z] = 0;
-               addrs.b[z] = 0;
-               ivar.b[z] = 0;
-               ovar.b[z] = 0;
-       }
-
-       /*
-        * pass 1
-        * build aux data structure
-        * allocate pcs
-        * find use and set of variables
-        */
-       g = flowstart(firstp, sizeof(Reg));
-       if(g == nil) {
-               for(i=0; i<nvar; i++)
-                       var[i].node->opt = nil;
-               return;
-       }
-
-       firstr = (Reg*)g->start;
-
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               p = r->f.prog;
-               if(p->as == AVARDEF || p->as == AVARKILL)
-                       continue;
-               proginfo(&info, p);
-
-               // Avoid making variables for direct-called functions.
-               if(p->as == ABL && p->to.name == NAME_EXTERN)
-                       continue;
-
-               bit = mkvar(r, &p->from);
-               if(info.flags & LeftRead)
-                       for(z=0; z<BITS; z++)
-                               r->use1.b[z] |= bit.b[z];
-               if(info.flags & LeftAddr)
-                       setaddrs(bit);
-
-               if(info.flags & RegRead)        
-                       r->use1.b[0] |= RtoB(p->reg);
-
-               if(info.flags & (RightAddr | RightRead | RightWrite)) {
-                       bit = mkvar(r, &p->to);
-                       if(info.flags & RightAddr)
-                               setaddrs(bit);
-                       if(info.flags & RightRead)
-                               for(z=0; z<BITS; z++)
-                                       r->use2.b[z] |= bit.b[z];
-                       if(info.flags & RightWrite)
-                               for(z=0; z<BITS; z++)
-                                       r->set.b[z] |= bit.b[z];
-               }
-
-               /* the mod/div runtime routines smash R12 */
-               if(p->as == ADIV || p->as == ADIVU || p->as == AMOD || p->as == AMODU)
-                       r->set.b[0] |= RtoB(REG_R12);
-       }
-       if(firstr == R)
-               return;
-
-       for(i=0; i<nvar; i++) {
-               Var *v = var+i;
-               if(v->addr) {
-                       bit = blsh(i);
-                       for(z=0; z<BITS; z++)
-                               addrs.b[z] |= bit.b[z];
-               }
-
-               if(debug['R'] && debug['v'])
-                       print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
-                               i, v->addr, v->etype, v->width, v->node, v->offset);
-       }
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass1", &firstr->f, 1);
-
-       /*
-        * pass 2
-        * find looping structure
-        */
-       flowrpo(g);
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass2", &firstr->f, 1);
-
-       /*
-        * pass 2.5
-        * iterate propagating fat vardef covering forward
-        * r->act records vars with a VARDEF since the last CALL.
-        * (r->act will be reused in pass 5 for something else,
-        * but we'll be done with it by then.)
-        */
-       active = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               r->f.active = 0;
-               r->act = zbits;
-       }
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               p = r->f.prog;
-               if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) {
-                       active++;
-                       walkvardef(p->to.node, r, active);
-               }
-       }
-
-       /*
-        * pass 3
-        * iterate propagating usage
-        *      back until flow graph is complete
-        */
-loop1:
-       change = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->f.active = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               if(r->f.prog->as == ARET)
-                       prop(r, zbits, zbits);
-loop11:
-       /* pick up unreachable code */
-       i = 0;
-       for(r = firstr; r != R; r = r1) {
-               r1 = (Reg*)r->f.link;
-               if(r1 && r1->f.active && !r->f.active) {
-                       prop(r, zbits, zbits);
-                       i = 1;
-               }
-       }
-       if(i)
-               goto loop11;
-       if(change)
-               goto loop1;
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass3", &firstr->f, 1);
-
-
-       /*
-        * pass 4
-        * iterate propagating register/variable synchrony
-        *      forward until graph is complete
-        */
-loop2:
-       change = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->f.active = 0;
-       synch(firstr, zbits);
-       if(change)
-               goto loop2;
-
-       addsplits();
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass4", &firstr->f, 1);
-
-       if(debug['R'] > 1) {
-               print("\nprop structure:\n");
-               for(r = firstr; r != R; r = (Reg*)r->f.link) {
-                       print("%d:%P", r->f.loop, r->f.prog);
-                       for(z=0; z<BITS; z++) {
-                               bit.b[z] = r->set.b[z] |
-                                       r->refahead.b[z] | r->calahead.b[z] |
-                                       r->refbehind.b[z] | r->calbehind.b[z] |
-                                       r->use1.b[z] | r->use2.b[z];
-                               bit.b[z] &= ~addrs.b[z];
-                       }
-
-                       if(bany(&bit)) {
-                               print("\t");
-                               if(bany(&r->use1))
-                                       print(" u1=%Q", r->use1);
-                               if(bany(&r->use2))
-                                       print(" u2=%Q", r->use2);
-                               if(bany(&r->set))
-                                       print(" st=%Q", r->set);
-                               if(bany(&r->refahead))
-                                       print(" ra=%Q", r->refahead);
-                               if(bany(&r->calahead))
-                                       print(" ca=%Q", r->calahead);
-                               if(bany(&r->refbehind))
-                                       print(" rb=%Q", r->refbehind);
-                               if(bany(&r->calbehind))
-                                       print(" cb=%Q", r->calbehind);
-                       }
-                       print("\n");
-               }
-       }
-
-       /*
-        * pass 4.5
-        * move register pseudo-variables into regu.
-        */
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
-
-               r->set.b[0] &= ~REGBITS;
-               r->use1.b[0] &= ~REGBITS;
-               r->use2.b[0] &= ~REGBITS;
-               r->refbehind.b[0] &= ~REGBITS;
-               r->refahead.b[0] &= ~REGBITS;
-               r->calbehind.b[0] &= ~REGBITS;
-               r->calahead.b[0] &= ~REGBITS;
-               r->regdiff.b[0] &= ~REGBITS;
-               r->act.b[0] &= ~REGBITS;
-       }
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass4.5", &firstr->f, 1);
-
-       /*
-        * pass 5
-        * isolate regions
-        * calculate costs (paint1)
-        */
-       r = firstr;
-       if(r) {
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
-                         ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
-               if(bany(&bit) && !r->f.refset) {
-                       // should never happen - all variables are preset
-                       if(debug['w'])
-                               print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
-                       r->f.refset = 1;
-               }
-       }
-
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->act = zbits;
-       rgp = region;
-       nregion = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = r->set.b[z] &
-                         ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
-               if(bany(&bit) && !r->f.refset) {
-                       if(debug['w'])
-                               print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
-                       r->f.refset = 1;
-                       excise(&r->f);
-               }
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
-               while(bany(&bit)) {
-                       i = bnum(bit);
-                       rgp->enter = r;
-                       rgp->varno = i;
-                       change = 0;
-                       if(debug['R'] > 1)
-                               print("\n");
-                       paint1(r, i);
-                       biclr(&bit, i);
-                       if(change <= 0) {
-                               if(debug['R'])
-                                       print("%L $%d: %Q\n",
-                                               r->f.prog->lineno, change, blsh(i));
-                               continue;
-                       }
-                       rgp->cost = change;
-                       nregion++;
-                       if(nregion >= NRGN) {
-                               if(debug['R'] > 1)
-                                       print("too many regions\n");
-                               goto brk;
-                       }
-                       rgp++;
-               }
-       }
-brk:
-       qsort(region, nregion, sizeof(region[0]), rcmp);
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass5", &firstr->f, 1);
-
-       /*
-        * pass 6
-        * determine used registers (paint2)
-        * replace code (paint3)
-        */
-       rgp = region;
-       if(debug['R'] && debug['v'])
-               print("\nregisterizing\n");
-       for(i=0; i<nregion; i++) {
-               if(debug['R'] && debug['v'])
-                       print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
-               bit = blsh(rgp->varno);
-               vreg = paint2(rgp->enter, rgp->varno, 0);
-               vreg = allreg(vreg, rgp);
-               if(debug['R']) {
-                       print("%L $%d %R: %Q\n",
-                               rgp->enter->f.prog->lineno,
-                               rgp->cost,
-                               rgp->regno,
-                               bit);
-               }
-               if(rgp->regno != 0)
-                       paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
-               rgp++;
-       }
-
-       /*
-        * free aux structures. peep allocates new ones.
-        */
-       for(i=0; i<nvar; i++)
-               var[i].node->opt = nil;
-       flowend(g);
-       firstr = R;
-
-       if(debug['R'] && debug['v']) {
-               // Rebuild flow graph, since we inserted instructions
-               g = flowstart(firstp, sizeof(Reg));
-               firstr = (Reg*)g->start;
-               dumpit("pass6", &firstr->f, 1);
-               flowend(g);
-               firstr = R;
-       }
-
-       /*
-        * pass 7
-        * peep-hole on basic block
-        */
-       if(!debug['R'] || debug['P']) {
-               peep(firstp);
-       }
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass7", &firstr->f, 1);
-
-       /*
-        * last pass
-        * eliminate nops
-        * free aux structures
-        * adjust the stack pointer
-        *      MOVW.W  R1,-12(R13)                     <<- start
-        *      MOVW    R0,R1
-        *      MOVW    R1,8(R13)
-        *      MOVW    $0,R1
-        *      MOVW    R1,4(R13)
-        *      BL      ,runtime.newproc+0(SB)
-        *      MOVW    &ft+-32(SP),R7                  <<- adjust
-        *      MOVW    &j+-40(SP),R6                   <<- adjust
-        *      MOVW    autotmp_0003+-24(SP),R5         <<- adjust
-        *      MOVW    $12(R13),R13                    <<- finish
-        */
-       vreg = 0;
-       for(p = firstp; p != P; p = p->link) {
-               while(p->link != P && p->link->as == ANOP)
-                       p->link = p->link->link;
-               if(p->to.type == TYPE_BRANCH)
-                       while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
-                               p->to.u.branch = p->to.u.branch->link;
-               if(p->as == AMOVW && p->to.reg == 13) {
-                       if(p->scond & C_WBIT) {
-                               vreg = -p->to.offset;           // in adjust region
-//                             print("%P adjusting %d\n", p, vreg);
-                               continue;
-                       }
-                       if(p->from.type == TYPE_CONST && p->to.type == TYPE_REG) {
-                               if(p->from.offset != vreg)
-                                       print("in and out different\n");
-//                             print("%P finish %d\n", p, vreg);
-                               vreg = 0;       // done adjust region
-                               continue;
-                       }
-
-//                     print("%P %d %d from type\n", p, p->from.type, TYPE_CONST);
-//                     print("%P %d %d to type\n\n", p, p->to.type, TYPE_REG);
-               }
-
-               if(p->as == AMOVW && vreg != 0) {
-                       if(p->from.sym != nil)
-                       if(p->from.name == NAME_AUTO || p->from.name == NAME_PARAM) {
-                               p->from.offset += vreg;
-//                             print("%P adjusting from %d %d\n", p, vreg, p->from.type);
-                       }
-                       if(p->to.sym != nil)
-                       if(p->to.name == NAME_AUTO || p->to.name == NAME_PARAM) {
-                               p->to.offset += vreg;
-//                             print("%P adjusting to %d %d\n", p, vreg, p->from.type);
-                       }
-               }
-       }
-}
-
-static void
-walkvardef(Node *n, Reg *r, int active)
-{
-       Reg *r1, *r2;
-       int bn;
-       Var *v;
-       
-       for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
-               if(r1->f.active == active)
-                       break;
-               r1->f.active = active;
-               if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
-                       break;
-               for(v=n->opt; v!=nil; v=v->nextinnode) {
-                       bn = v - var;
-                       biset(&r1->act, bn);
-               }
-               if(r1->f.prog->as == ABL)
-                       break;
-       }
-
-       for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
-               if(r2->f.s2 != nil)
-                       walkvardef(n, (Reg*)r2->f.s2, active);
-}
-
-void
-addsplits(void)
-{
-       Reg *r, *r1;
-       int z, i;
-       Bits bit;
-
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               if(r->f.loop > 1)
-                       continue;
-               if(r->f.prog->as == ABL)
-                       continue;
-               if(r->f.prog->as == ADUFFZERO)
-                       continue;
-               if(r->f.prog->as == ADUFFCOPY)
-                       continue;
-               for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) {
-                       if(r1->f.loop <= 1)
-                               continue;
-                       for(z=0; z<BITS; z++)
-                               bit.b[z] = r1->calbehind.b[z] &
-                                       (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
-                                       ~(r->calahead.b[z] & addrs.b[z]);
-                       while(bany(&bit)) {
-                               i = bnum(bit);
-                               biclr(&bit, i);
-                       }
-               }
-       }
-}
-
-/*
- * add mov b,rn
- * just after r
- */
-void
-addmove(Reg *r, int bn, int rn, int f)
-{
-       Prog *p, *p1, *p2;
-       Adr *a;
-       Var *v;
-
-       p1 = mal(sizeof(*p1));
-       *p1 = zprog;
-       p = r->f.prog;
-       
-       // If there's a stack fixup coming (after BL newproc or BL deferproc),
-       // delay the load until after the fixup.
-       p2 = p->link;
-       if(p2 && p2->as == AMOVW && p2->from.type == TYPE_ADDR && p2->from.reg == REGSP && p2->to.reg == REGSP && p2->to.type == TYPE_REG)
-               p = p2;
-
-       p1->link = p->link;
-       p->link = p1;
-       p1->lineno = p->lineno;
-
-       v = var + bn;
-
-       a = &p1->to;
-       a->name = v->name;
-       a->node = v->node;
-       a->sym = linksym(v->node->sym);
-       a->offset = v->offset;
-       a->etype = v->etype;
-       a->type = TYPE_MEM;
-       if(a->etype == TARRAY)
-               a->type = TYPE_ADDR;
-       else if(a->sym == nil)
-               a->type = TYPE_CONST;
-
-       if(v->addr)
-               fatal("addmove: shouldn't be doing this %A\n", a);
-
-       switch(v->etype) {
-       default:
-               print("What is this %E\n", v->etype);
-
-       case TINT8:
-               p1->as = AMOVBS;
-               break;
-       case TBOOL:
-       case TUINT8:
-//print("movbu %E %d %S\n", v->etype, bn, v->sym);
-               p1->as = AMOVBU;
-               break;
-       case TINT16:
-               p1->as = AMOVHS;
-               break;
-       case TUINT16:
-               p1->as = AMOVHU;
-               break;
-       case TINT32:
-       case TUINT32:
-       case TPTR32:
-               p1->as = AMOVW;
-               break;
-       case TFLOAT32:
-               p1->as = AMOVF;
-               break;
-       case TFLOAT64:
-               p1->as = AMOVD;
-               break;
-       }
-
-       p1->from.type = TYPE_REG;
-       p1->from.reg = rn;
-       if(!f) {
-               p1->from = *a;
-               *a = zprog.from;
-               a->type = TYPE_REG;
-               a->reg = rn;
-               if(v->etype == TUINT8 || v->etype == TBOOL)
-                       p1->as = AMOVBU;
-               if(v->etype == TUINT16)
-                       p1->as = AMOVHU;
-       }
-       if(debug['R'])
-               print("%P\t.a%P\n", p, p1);
-}
-
-static int
-overlap(int32 o1, int w1, int32 o2, int w2)
-{
-       int32 t1, t2;
-
-       t1 = o1+w1;
-       t2 = o2+w2;
-
-       if(!(t1 > o2 && t2 > o1))
-               return 0;
-
-       return 1;
-}
-
-Bits
-mkvar(Reg *r, Adr *a)
-{
-       Var *v;
-       int i, t, n, et, z, w, flag;
-       int32 o;
-       Bits bit;
-       Node *node;
-
-       // mark registers used
-       t = a->type;
-
-       flag = 0;
-       switch(t) {
-       default:
-               print("mkvar: type %d %d %D\n", t, a->name, a);
-               goto none;
-
-       case TYPE_NONE:
-       case TYPE_FCONST:
-       case TYPE_BRANCH:
-               break;
-
-
-       case TYPE_REGREG:
-       case TYPE_REGREG2:
-               bit = zbits;
-               if(a->offset != 0)
-                       bit.b[0] |= RtoB(a->offset);
-               if(a->reg != 0)
-                       bit.b[0] |= RtoB(a->reg);
-               return bit;
-       
-       case TYPE_CONST:
-               if(a->reg != 0)
-                       fatal("found CONST instead of ADDR: %D", a);
-               break;
-
-       case TYPE_ADDR:
-       case TYPE_REG:
-       case TYPE_SHIFT:
-               if(a->reg != 0) {
-                       bit = zbits;
-                       bit.b[0] = RtoB(a->reg);
-                       return bit;
-               }
-               break;
-
-       case TYPE_MEM:
-               if(a->reg != 0) {
-                       if(a == &r->f.prog->from)
-                               r->use1.b[0] |= RtoB(a->reg);
-                       else
-                               r->use2.b[0] |= RtoB(a->reg);
-                       if(r->f.prog->scond & (C_PBIT|C_WBIT))
-                               r->set.b[0] |= RtoB(a->reg);
-               }
-               break;
-       }
-
-       switch(a->name) {
-       default:
-               goto none;
-
-       case NAME_EXTERN:
-       case NAME_STATIC:
-       case NAME_AUTO:
-       case NAME_PARAM:
-               n = a->name;
-               break;
-       }
-
-       node = a->node;
-       if(node == N || node->op != ONAME || node->orig == N)
-               goto none;
-       node = node->orig;
-       if(node->orig != node)
-               fatal("%D: bad node", a);
-       if(node->sym == S || node->sym->name[0] == '.')
-               goto none;
-       et = a->etype;
-       o = a->offset;
-       w = a->width;
-       if(w < 0)
-               fatal("bad width %d for %D", w, a);
-
-       for(i=0; i<nvar; i++) {
-               v = var+i;
-               if(v->node == node && v->name == n) {
-                       if(v->offset == o)
-                       if(v->etype == et)
-                       if(v->width == w)
-                               if(!flag)
-                                       return blsh(i);
-
-                       // if they overlap, disable both
-                       if(overlap(v->offset, v->width, o, w)) {
-                               v->addr = 1;
-                               flag = 1;
-                       }
-               }
-       }
-
-       switch(et) {
-       case 0:
-       case TFUNC:
-               goto none;
-       }
-
-       if(nvar >= NVAR) {
-               if(debug['w'] > 1 && node)
-                       fatal("variable not optimized: %D", a);
-               
-               // If we're not tracking a word in a variable, mark the rest as
-               // having its address taken, so that we keep the whole thing
-               // live at all calls. otherwise we might optimize away part of
-               // a variable but not all of it.
-               for(i=0; i<nvar; i++) {
-                       v = var+i;
-                       if(v->node == node)
-                               v->addr = 1;
-               }
-               goto none;
-       }
-
-       i = nvar;
-       nvar++;
-//print("var %d %E %D %S\n", i, et, a, s);
-       v = var+i;
-       v->offset = o;
-       v->name = n;
-       v->etype = et;
-       v->width = w;
-       v->addr = flag;         // funny punning
-       v->node = node;
-       
-       // node->opt is the head of a linked list
-       // of Vars within the given Node, so that
-       // we can start at a Var and find all the other
-       // Vars in the same Go variable.
-       v->nextinnode = node->opt;
-       node->opt = v;
-       
-       bit = blsh(i);
-       if(n == NAME_EXTERN || n == NAME_STATIC)
-               for(z=0; z<BITS; z++)
-                       externs.b[z] |= bit.b[z];
-       if(n == NAME_PARAM)
-               for(z=0; z<BITS; z++)
-                       params.b[z] |= bit.b[z];
-
-       if(node->class == PPARAM)
-               for(z=0; z<BITS; z++)
-                       ivar.b[z] |= bit.b[z];
-       if(node->class == PPARAMOUT)
-               for(z=0; z<BITS; z++)
-                       ovar.b[z] |= bit.b[z];
-
-       // Treat values with their address taken as live at calls,
-       // because the garbage collector's liveness analysis in ../gc/plive.c does.
-       // These must be consistent or else we will elide stores and the garbage
-       // collector will see uninitialized data.
-       // The typical case where our own analysis is out of sync is when the
-       // node appears to have its address taken but that code doesn't actually
-       // get generated and therefore doesn't show up as an address being
-       // taken when we analyze the instruction stream.
-       // One instance of this case is when a closure uses the same name as
-       // an outer variable for one of its own variables declared with :=.
-       // The parser flags the outer variable as possibly shared, and therefore
-       // sets addrtaken, even though it ends up not being actually shared.
-       // If we were better about _ elision, _ = &x would suffice too.
-       // The broader := in a closure problem is mentioned in a comment in
-       // closure.c:/^typecheckclosure and dcl.c:/^oldname.
-       if(node->addrtaken)
-               v->addr = 1;
-
-       // Disable registerization for globals, because:
-       // (1) we might panic at any time and we want the recovery code
-       // to see the latest values (issue 1304).
-       // (2) we don't know what pointers might point at them and we want
-       // loads via those pointers to see updated values and vice versa (issue 7995).
-       //
-       // Disable registerization for results if using defer, because the deferred func
-       // might recover and return, causing the current values to be used.
-       if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
-               v->addr = 1;
-
-       if(debug['R'])
-               print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
-
-       return bit;
-
-none:
-       return zbits;
-}
-
-void
-prop(Reg *r, Bits ref, Bits cal)
+char**
+regnames(int *n)
 {
-       Reg *r1, *r2;
-       int z, i, j;
-       Var *v, *v1;
-
-       for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
-               for(z=0; z<BITS; z++) {
-                       ref.b[z] |= r1->refahead.b[z];
-                       if(ref.b[z] != r1->refahead.b[z]) {
-                               r1->refahead.b[z] = ref.b[z];
-                               change++;
-                       }
-                       cal.b[z] |= r1->calahead.b[z];
-                       if(cal.b[z] != r1->calahead.b[z]) {
-                               r1->calahead.b[z] = cal.b[z];
-                               change++;
-                       }
-               }
-               switch(r1->f.prog->as) {
-               case ABL:
-                       if(noreturn(r1->f.prog))
-                               break;
-
-                       // Mark all input variables (ivar) as used, because that's what the
-                       // liveness bitmaps say. The liveness bitmaps say that so that a
-                       // panic will not show stale values in the parameter dump.
-                       // Mark variables with a recent VARDEF (r1->act) as used,
-                       // so that the optimizer flushes initializations to memory,
-                       // so that if a garbage collection happens during this CALL,
-                       // the collector will see initialized memory. Again this is to
-                       // match what the liveness bitmaps say.
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
-                               ref.b[z] = 0;
-                       }
-                       
-                       // cal.b is the current approximation of what's live across the call.
-                       // Every bit in cal.b is a single stack word. For each such word,
-                       // find all the other tracked stack words in the same Go variable
-                       // (struct/slice/string/interface) and mark them live too.
-                       // This is necessary because the liveness analysis for the garbage
-                       // collector works at variable granularity, not at word granularity.
-                       // It is fundamental for slice/string/interface: the garbage collector
-                       // needs the whole value, not just some of the words, in order to
-                       // interpret the other bits correctly. Specifically, slice needs a consistent
-                       // ptr and cap, string needs a consistent ptr and len, and interface
-                       // needs a consistent type word and data word.
-                       for(z=0; z<BITS; z++) {
-                               if(cal.b[z] == 0)
-                                       continue;
-                               for(i=0; i<64; i++) {
-                                       if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
-                                               continue;
-                                       v = var+z*64+i;
-                                       if(v->node->opt == nil) // v represents fixed register, not Go variable
-                                               continue;
-
-                                       // v->node->opt is the head of a linked list of Vars
-                                       // corresponding to tracked words from the Go variable v->node.
-                                       // Walk the list and set all the bits.
-                                       // For a large struct this could end up being quadratic:
-                                       // after the first setting, the outer loop (for z, i) would see a 1 bit
-                                       // for all of the remaining words in the struct, and for each such
-                                       // word would go through and turn on all the bits again.
-                                       // To avoid the quadratic behavior, we only turn on the bits if
-                                       // v is the head of the list or if the head's bit is not yet turned on.
-                                       // This will set the bits at most twice, keeping the overall loop linear.
-                                       v1 = v->node->opt;
-                                       j = v1 - var;
-                                       if(v == v1 || !btest(&cal, j)) {
-                                               for(; v1 != nil; v1 = v1->nextinnode) {
-                                                       j = v1 - var;
-                                                       biset(&cal, j);
-                                               }
-                                       }
-                               }
-                       }
-                       break;
-
-               case ATEXT:
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] = 0;
-                               ref.b[z] = 0;
-                       }
-                       break;
-
-               case ARET:
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] = externs.b[z] | ovar.b[z];
-                               ref.b[z] = 0;
-                       }
-                       break;
-               }
-               for(z=0; z<BITS; z++) {
-                       ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
-                               r1->use1.b[z] | r1->use2.b[z];
-                       cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
-                       r1->refbehind.b[z] = ref.b[z];
-                       r1->calbehind.b[z] = cal.b[z];
-               }
-               if(r1->f.active)
-                       break;
-               r1->f.active = 1;
-       }
-       for(; r != r1; r = (Reg*)r->f.p1)
-               for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
-                       prop(r2, r->refbehind, r->calbehind);
+       *n = NREGVAR;
+       return regname;
 }
 
-void
-synch(Reg *r, Bits dif)
+uint64
+excludedregs(void)
 {
-       Reg *r1;
-       int z;
-
-       for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
-               for(z=0; z<BITS; z++) {
-                       dif.b[z] = (dif.b[z] &
-                               ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
-                                       r1->set.b[z] | r1->regdiff.b[z];
-                       if(dif.b[z] != r1->regdiff.b[z]) {
-                               r1->regdiff.b[z] = dif.b[z];
-                               change++;
-                       }
-               }
-               if(r1->f.active)
-                       break;
-               r1->f.active = 1;
-               for(z=0; z<BITS; z++)
-                       dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
-               if(r1->f.s2 != nil)
-                       synch((Reg*)r1->f.s2, dif);
-       }
+       return RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
 }
 
-uint32
-allreg(uint32 b, Rgn *r)
+uint64
+doregbits(int r)
 {
-       Var *v;
-       int i;
-
-       v = var + r->varno;
-       r->regno = 0;
-       switch(v->etype) {
-
-       default:
-               fatal("unknown etype %d/%E", bitno(b), v->etype);
-               break;
-
-       case TINT8:
-       case TUINT8:
-       case TINT16:
-       case TUINT16:
-       case TINT32:
-       case TUINT32:
-       case TINT:
-       case TUINT:
-       case TUINTPTR:
-       case TBOOL:
-       case TPTR32:
-               i = BtoR(~b);
-               if(i && r->cost >= 0) {
-                       r->regno = i;
-                       return RtoB(i);
-               }
-               break;
-
-       case TFLOAT32:
-       case TFLOAT64:
-               i = BtoF(~b);
-               if(i && r->cost >= 0) {
-                       r->regno = i;
-                       return RtoB(i);
-               }
-               break;
-
-       case TINT64:
-       case TUINT64:
-       case TPTR64:
-       case TINTER:
-       case TSTRUCT:
-       case TARRAY:
-               break;
-       }
+       USED(r);
        return 0;
 }
 
-void
-paint1(Reg *r, int bn)
-{
-       Reg *r1;
-       Prog *p;
-       int z;
-       uint64 bb;
-
-       z = bn/64;
-       bb = 1LL<<(bn%64);
-       if(r->act.b[z] & bb)
-               return;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(r1->act.b[z] & bb)
-                       break;
-               r = r1;
-       }
-
-       if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
-               change -= CLOAD * r->f.loop;
-               if(debug['R'] > 1)
-                       print("%d%P\td %Q $%d\n", r->f.loop,
-                               r->f.prog, blsh(bn), change);
-       }
-       for(;;) {
-               r->act.b[z] |= bb;
-               p = r->f.prog;
-
-
-               if(r->f.prog->as != ANOP) { // don't give credit for NOPs
-                       if(r->use1.b[z] & bb) {
-                               change += CREF * r->f.loop;
-                               if(debug['R'] > 1)
-                                       print("%d%P\tu1 %Q $%d\n", r->f.loop,
-                                               p, blsh(bn), change);
-                       }
-                       if((r->use2.b[z]|r->set.b[z]) & bb) {
-                               change += CREF * r->f.loop;
-                               if(debug['R'] > 1)
-                                       print("%d%P\tu2 %Q $%d\n", r->f.loop,
-                                               p, blsh(bn), change);
-                       }
-               }
-
-               if(STORE(r) & r->regdiff.b[z] & bb) {
-                       change -= CLOAD * r->f.loop;
-                       if(debug['R'] > 1)
-                               print("%d%P\tst %Q $%d\n", r->f.loop,
-                                       p, blsh(bn), change);
-               }
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       paint1(r1, bn);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               paint1(r1, bn);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(r->act.b[z] & bb)
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-}
-
-uint32
-paint2(Reg *r, int bn, int depth)
-{
-       Reg *r1;
-       int z;
-       uint64 bb, vreg;
-
-       z = bn/64;
-       bb = 1LL << (bn%64);
-       vreg = regbits;
-       if(!(r->act.b[z] & bb))
-               return vreg;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(!(r1->act.b[z] & bb))
-                       break;
-               r = r1;
-       }
-       for(;;) {
-               if(debug['R'] && debug['v'])
-                       print("  paint2 %d %P\n", depth, r->f.prog);
-
-               r->act.b[z] &= ~bb;
-
-               vreg |= r->regu;
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       vreg |= paint2(r1, bn, depth+1);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               vreg |= paint2(r1, bn, depth+1);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(!(r->act.b[z] & bb))
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-       return vreg;
-}
-
-void
-paint3(Reg *r, int bn, uint32 rb, int rn)
-{
-       Reg *r1;
-       Prog *p;
-       int z;
-       uint64 bb;
-
-       z = bn/64;
-       bb = 1LL << (bn%64);
-       if(r->act.b[z] & bb)
-               return;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(r1->act.b[z] & bb)
-                       break;
-               r = r1;
-       }
-
-       if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
-               addmove(r, bn, rn, 0);
-
-       for(;;) {
-               r->act.b[z] |= bb;
-               p = r->f.prog;
-
-               if(r->use1.b[z] & bb) {
-                       if(debug['R'])
-                               print("%P", p);
-                       addreg(&p->from, rn);
-                       if(debug['R'])
-                               print("\t.c%P\n", p);
-               }
-               if((r->use2.b[z]|r->set.b[z]) & bb) {
-                       if(debug['R'])
-                               print("%P", p);
-                       addreg(&p->to, rn);
-                       if(debug['R'])
-                               print("\t.c%P\n", p);
-               }
-
-               if(STORE(r) & r->regdiff.b[z] & bb)
-                       addmove(r, bn, rn, 1);
-               r->regu |= rb;
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       paint3(r1, bn, rb, rn);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               paint3(r1, bn, rb, rn);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(r->act.b[z] & bb)
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-}
-
-void
-addreg(Adr *a, int rn)
-{
-       a->sym = nil;
-       a->node = nil;
-       a->name = NAME_NONE;
-       a->type = TYPE_REG;
-       a->reg = rn;
-}
-
 /*
  *     bit     reg
  *     0       R0
@@ -1324,26 +107,26 @@ addreg(Adr *a, int rn)
  *     ...     ...
  *     31      F15
  */
-uint32
+uint64
 RtoB(int r)
 {
        if(REG_R0 <= r && r <= REG_R15) {
                if(r >= REGTMP-2 && r != REG_R12)       // excluded R9 and R10 for m and g, but not R12
                        return 0;
-               return 1L << (r - REG_R0);
+               return 1ULL << (r - REG_R0);
        }
        
        if(REG_F0 <= r && r <= REG_F15) {
                if(r < REG_F2 || r > REG_F0+NFREG-1)
                        return 0;
-               return 1L << ((r - REG_F0) + 16);
+               return 1ULL << ((r - REG_F0) + 16);
        }
        
        return 0;
 }
 
 int
-BtoR(uint32 b)
+BtoR(uint64 b)
 {
        // TODO Allow R0 and R1, but be careful with a 0 return
        // TODO Allow R9. Only R10 is reserved now (just g, not m).
@@ -1354,88 +137,10 @@ BtoR(uint32 b)
 }
 
 int
-BtoF(uint32 b)
+BtoF(uint64 b)
 {
        b &= 0xfffc0000L;
        if(b == 0)
                return 0;
        return bitno(b) - 16 + REG_F0;
 }
-
-void
-dumpone(Flow *f, int isreg)
-{
-       int z;
-       Bits bit;
-       Reg *r;
-
-       print("%d:%P", f->loop, f->prog);
-       if(isreg) {
-               r = (Reg*)f;
-               for(z=0; z<BITS; z++)
-                       bit.b[z] =
-                               r->set.b[z] |
-                               r->use1.b[z] |
-                               r->use2.b[z] |
-                               r->refbehind.b[z] |
-                               r->refahead.b[z] |
-                               r->calbehind.b[z] |
-                               r->calahead.b[z] |
-                               r->regdiff.b[z] |
-                               r->act.b[z] |
-                                       0;
-               if(bany(&bit)) {
-                       print("\t");
-                       if(bany(&r->set))
-                               print(" s:%Q", r->set);
-                       if(bany(&r->use1))
-                               print(" u1:%Q", r->use1);
-                       if(bany(&r->use2))
-                               print(" u2:%Q", r->use2);
-                       if(bany(&r->refbehind))
-                               print(" rb:%Q ", r->refbehind);
-                       if(bany(&r->refahead))
-                               print(" ra:%Q ", r->refahead);
-                       if(bany(&r->calbehind))
-                               print(" cb:%Q ", r->calbehind);
-                       if(bany(&r->calahead))
-                               print(" ca:%Q ", r->calahead);
-                       if(bany(&r->regdiff))
-                               print(" d:%Q ", r->regdiff);
-                       if(bany(&r->act))
-                               print(" a:%Q ", r->act);
-               }
-       }
-       print("\n");
-}
-
-void
-dumpit(char *str, Flow *r0, int isreg)
-{
-       Flow *r, *r1;
-
-       print("\n%s\n", str);
-       for(r = r0; r != nil; r = r->link) {
-               dumpone(r, isreg);
-               r1 = r->p2;
-               if(r1 != nil) {
-                       print(" pred:");
-                       for(; r1 != nil; r1 = r1->p2link)
-                               print(" %.4ud", (int)r1->prog->pc);
-                       if(r->p1 != nil)
-                               print(" (and %.4ud)", (int)r->p1->prog->pc);
-                       else
-                               print(" (only)");
-                       print("\n");
-               }
-               // Print successors if it's not just the next one
-               if(r->s1 != r->link || r->s2 != nil) {
-                       print(" succ:");
-                       if(r->s1 != nil)
-                               print(" %.4ud", (int)r->s1->prog->pc);
-                       if(r->s2 != nil)
-                               print(" %.4ud", (int)r->s2->prog->pc);
-                       print("\n");
-               }
-       }
-}
index 71ad402cd5b446e7732d25680c9d7d79d45a7d5e..ad66366c78fd12157a2cbd2c7db2d9aa554371be 100644 (file)
@@ -89,14 +89,22 @@ main(int argc, char **argv)
        arch.ginscall = ginscall;
        arch.igen = igen;
        arch.linkarchinit = linkarchinit;
+       arch.peep = peep;
        arch.proginfo = proginfo;
        arch.regalloc = regalloc;
        arch.regfree = regfree;
-       arch.regopt = regopt;
        arch.regtyp = regtyp;
        arch.sameaddr = sameaddr;
        arch.smallindir = smallindir;
        arch.stackaddr = stackaddr;
+       arch.excludedregs = excludedregs;
+       arch.RtoB = RtoB;
+       arch.FtoB = FtoB;
+       arch.BtoR = BtoR;
+       arch.BtoF = BtoF;
+       arch.optoas = optoas;
+       arch.doregbits = doregbits;
+       arch.regnames = regnames;
        
        gcmain(argc, argv);
 }
index 789a9870eaf5f706d58a1febea631ac43c979522..a6dfad9c8e1525d8fd664aa208d4e796a61846ed 100644 (file)
@@ -159,3 +159,18 @@ int smallindir(Addr*, Addr*);
 int stackaddr(Addr*);
 Prog* unpatch(Prog*);
 
+/*
+ * reg.c
+ */
+uint64 excludedregs(void);
+uint64 RtoB(int);
+uint64 FtoB(int);
+int BtoR(uint64);
+int BtoF(uint64);
+uint64 doregbits(int);
+char** regnames(int*);
+
+/*
+ * peep.c
+ */
+void peep(Prog*);
index dd61812bcdeb5dffbfa9cfb2479bd0db01194265..72104589a342fddf413443a06e330e156d266d19 100644 (file)
@@ -7,7 +7,7 @@
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
 static Prog *appendpp(Prog*, int, int, int, vlong, int, int, vlong);
 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax);
index e05a06087f0d6e9e4f122717ed9dda09de440b4c..279b60d4e4124205e007ef4701c14f10d6feb892 100644 (file)
@@ -31,7 +31,7 @@
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
 static void    conprop(Flow *r);
 static void    elimshortmov(Graph *g);
@@ -44,9 +44,15 @@ static int   copy1(Adr*, Adr*, Flow*, int);
 static int     copyas(Adr*, Adr*);
 static int     copyau(Adr*, Adr*);
 static int     copysub(Adr*, Adr*, Adr*, int);
+static int     copyu(Prog*, Adr*, Adr*);
 
 static uint32  gactive;
 
+enum
+{
+       exregoffset = REG_R15,
+};
+
 // do we need the carry bit
 static int
 needc(Prog *p)
@@ -737,7 +743,7 @@ copy1(Adr *v1, Adr *v2, Flow *r, int f)
  * 4 if set and used
  * 0 otherwise (not touched)
  */
-int
+static int
 copyu(Prog *p, Adr *v, Adr *s)
 {
        ProgInfo info;
index 32d5256f8c83f86b5789a14b1586059424e5d9a4..79b7911e5b8a6b5f7404e0231ae44df31bdfaddc 100644 (file)
@@ -5,7 +5,7 @@
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
 // Matches real RtoB but can be used in global initializer.
 #define RtoB(r) (1<<((r)-REG_AX))
index 7db44245f1d6804ec8135d5ba8f6d43ccc91258d..e01f265a13ac562b274591a55766770229d562df 100644 (file)
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
-#define        NREGVAR 32      /* 16 general + 16 floating */
-#define        REGBITS ((uint64)0xffffffffull)
-/*c2go enum {
+enum {
        NREGVAR = 32,
-       REGBITS = 0xffffffff,
 };
-*/
-
-static Reg*    firstr;
-static int     first   = 1;
-
-int
-rcmp(const void *a1, const void *a2)
-{
-       Rgn *p1, *p2;
-       int c1, c2;
-
-       p1 = (Rgn*)a1;
-       p2 = (Rgn*)a2;
-       c1 = p2->cost;
-       c2 = p1->cost;
-       if(c1 -= c2)
-               return c1;
-       return p2->varno - p1->varno;
-}
-
-static void
-setaddrs(Bits bit)
-{
-       int i, n;
-       Var *v;
-       Node *node;
-
-       while(bany(&bit)) {
-               // convert each bit to a variable
-               i = bnum(bit);
-               node = var[i].node;
-               n = var[i].name;
-               biclr(&bit, i);
-
-               // disable all pieces of that variable
-               for(i=0; i<nvar; i++) {
-                       v = var+i;
-                       if(v->node == node && v->name == n)
-                               v->addr = 2;
-               }
-       }
-}
 
 static char* regname[] = {
        ".AX",
@@ -117,471 +72,23 @@ static char* regname[] = {
        ".X15",
 };
 
-static Node* regnodes[NREGVAR];
-
-static void walkvardef(Node *n, Reg *r, int active);
-
-void
-regopt(Prog *firstp)
-{
-       Reg *r, *r1;
-       Prog *p;
-       Graph *g;
-       ProgInfo info;
-       int i, z, active;
-       uint32 vreg;
-       Bits bit;
-
-       if(first) {
-               fmtinstall('Q', Qconv);
-               exregoffset = REG_R15;
-               first = 0;
-       }
-
-       mergetemp(firstp);
-
-       /*
-        * control flow is more complicated in generated go code
-        * than in generated c code.  define pseudo-variables for
-        * registers, so we have complete register usage information.
-        */
-       nvar = NREGVAR;
-       memset(var, 0, NREGVAR*sizeof var[0]);
-       for(i=0; i<NREGVAR; i++) {
-               if(regnodes[i] == N)
-                       regnodes[i] = newname(lookup(regname[i]));
-               var[i].node = regnodes[i];
-       }
-
-       regbits = RtoB(REG_SP);
-       for(z=0; z<BITS; z++) {
-               externs.b[z] = 0;
-               params.b[z] = 0;
-               consts.b[z] = 0;
-               addrs.b[z] = 0;
-               ivar.b[z] = 0;
-               ovar.b[z] = 0;
-       }
-
-       /*
-        * pass 1
-        * build aux data structure
-        * allocate pcs
-        * find use and set of variables
-        */
-       g = flowstart(firstp, sizeof(Reg));
-       if(g == nil) {
-               for(i=0; i<nvar; i++)
-                       var[i].node->opt = nil;
-               return;
-       }
-
-       firstr = (Reg*)g->start;
-
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               p = r->f.prog;
-               if(p->as == AVARDEF || p->as == AVARKILL)
-                       continue;
-               proginfo(&info, p);
-
-               // Avoid making variables for direct-called functions.
-               if(p->as == ACALL && p->to.type == TYPE_MEM && p->to.name == NAME_EXTERN)
-                       continue;
-
-               r->use1.b[0] |= info.reguse | info.regindex;
-               r->set.b[0] |= info.regset;
-
-               bit = mkvar(r, &p->from);
-               if(bany(&bit)) {
-                       if(info.flags & LeftAddr)
-                               setaddrs(bit);
-                       if(info.flags & LeftRead)
-                               for(z=0; z<BITS; z++)
-                                       r->use1.b[z] |= bit.b[z];
-                       if(info.flags & LeftWrite)
-                               for(z=0; z<BITS; z++)
-                                       r->set.b[z] |= bit.b[z];
-               }
-
-               bit = mkvar(r, &p->to);
-               if(bany(&bit)) {        
-                       if(info.flags & RightAddr)
-                               setaddrs(bit);
-                       if(info.flags & RightRead)
-                               for(z=0; z<BITS; z++)
-                                       r->use2.b[z] |= bit.b[z];
-                       if(info.flags & RightWrite)
-                               for(z=0; z<BITS; z++)
-                                       r->set.b[z] |= bit.b[z];
-               }
-       }
-
-       for(i=0; i<nvar; i++) {
-               Var *v = var+i;
-               if(v->addr) {
-                       bit = blsh(i);
-                       for(z=0; z<BITS; z++)
-                               addrs.b[z] |= bit.b[z];
-               }
-
-               if(debug['R'] && debug['v'])
-                       print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
-                               i, v->addr, v->etype, v->width, v->node, v->offset);
-       }
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass1", &firstr->f, 1);
-
-       /*
-        * pass 2
-        * find looping structure
-        */
-       flowrpo(g);
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass2", &firstr->f, 1);
-
-       /*
-        * pass 2.5
-        * iterate propagating fat vardef covering forward
-        * r->act records vars with a VARDEF since the last CALL.
-        * (r->act will be reused in pass 5 for something else,
-        * but we'll be done with it by then.)
-        */
-       active = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               r->f.active = 0;
-               r->act = zbits;
-       }
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               p = r->f.prog;
-               if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) {
-                       active++;
-                       walkvardef(p->to.node, r, active);
-               }
-       }
-
-       /*
-        * pass 3
-        * iterate propagating usage
-        *      back until flow graph is complete
-        */
-loop1:
-       change = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->f.active = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               if(r->f.prog->as == ARET)
-                       prop(r, zbits, zbits);
-loop11:
-       /* pick up unreachable code */
-       i = 0;
-       for(r = firstr; r != R; r = r1) {
-               r1 = (Reg*)r->f.link;
-               if(r1 && r1->f.active && !r->f.active) {
-                       prop(r, zbits, zbits);
-                       i = 1;
-               }
-       }
-       if(i)
-               goto loop11;
-       if(change)
-               goto loop1;
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass3", &firstr->f, 1);
-
-       /*
-        * pass 4
-        * iterate propagating register/variable synchrony
-        *      forward until graph is complete
-        */
-loop2:
-       change = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->f.active = 0;
-       synch(firstr, zbits);
-       if(change)
-               goto loop2;
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass4", &firstr->f, 1);
-
-       /*
-        * pass 4.5
-        * move register pseudo-variables into regu.
-        */
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
-
-               r->set.b[0] &= ~REGBITS;
-               r->use1.b[0] &= ~REGBITS;
-               r->use2.b[0] &= ~REGBITS;
-               r->refbehind.b[0] &= ~REGBITS;
-               r->refahead.b[0] &= ~REGBITS;
-               r->calbehind.b[0] &= ~REGBITS;
-               r->calahead.b[0] &= ~REGBITS;
-               r->regdiff.b[0] &= ~REGBITS;
-               r->act.b[0] &= ~REGBITS;
-       }
-
-       /*
-        * pass 5
-        * isolate regions
-        * calculate costs (paint1)
-        */
-       r = firstr;
-       if(r) {
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
-                         ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
-               if(bany(&bit) && !r->f.refset) {
-                       // should never happen - all variables are preset
-                       if(debug['w'])
-                               print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
-                       r->f.refset = 1;
-               }
-       }
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->act = zbits;
-       rgp = region;
-       nregion = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = r->set.b[z] &
-                         ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
-               if(bany(&bit) && !r->f.refset) {
-                       if(debug['w'])
-                               print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
-                       r->f.refset = 1;
-                       excise(&r->f);
-               }
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
-               while(bany(&bit)) {
-                       i = bnum(bit);
-                       rgp->enter = r;
-                       rgp->varno = i;
-                       change = 0;
-                       paint1(r, i);
-                       biclr(&bit, i);
-                       if(change <= 0)
-                               continue;
-                       rgp->cost = change;
-                       nregion++;
-                       if(nregion >= NRGN) {
-                               if(debug['R'] && debug['v'])
-                                       print("too many regions\n");
-                               goto brk;
-                       }
-                       rgp++;
-               }
-       }
-brk:
-       qsort(region, nregion, sizeof(region[0]), rcmp);
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass5", &firstr->f, 1);
-
-       /*
-        * pass 6
-        * determine used registers (paint2)
-        * replace code (paint3)
-        */
-       rgp = region;
-       if(debug['R'] && debug['v'])
-               print("\nregisterizing\n");
-       for(i=0; i<nregion; i++) {
-               if(debug['R'] && debug['v'])
-                       print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
-               bit = blsh(rgp->varno);
-               vreg = paint2(rgp->enter, rgp->varno, 0);
-               vreg = allreg(vreg, rgp);
-               if(rgp->regno != 0) {
-                       if(debug['R'] && debug['v']) {
-                               Var *v;
-
-                               v = var + rgp->varno;
-                               print("registerize %N+%lld (bit=%2d et=%2E) in %R\n",
-                                               v->node, v->offset, rgp->varno, v->etype, rgp->regno);
-                       }
-                       paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
-               }
-               rgp++;
-       }
-
-       /*
-        * free aux structures. peep allocates new ones.
-        */
-       for(i=0; i<nvar; i++)
-               var[i].node->opt = nil;
-       flowend(g);
-       firstr = R;
-
-       if(debug['R'] && debug['v']) {
-               // Rebuild flow graph, since we inserted instructions
-               g = flowstart(firstp, sizeof(Reg));
-               firstr = (Reg*)g->start;
-               dumpit("pass6", &firstr->f, 1);
-               flowend(g);
-               firstr = R;
-       }
-
-       /*
-        * pass 7
-        * peep-hole on basic block
-        */
-       if(!debug['R'] || debug['P'])
-               peep(firstp);
-
-       /*
-        * eliminate nops
-        */
-       for(p=firstp; p!=P; p=p->link) {
-               while(p->link != P && p->link->as == ANOP)
-                       p->link = p->link->link;
-               if(p->to.type == TYPE_BRANCH)
-                       while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
-                               p->to.u.branch = p->to.u.branch->link;
-       }
-
-       if(debug['R']) {
-               if(ostats.ncvtreg ||
-                  ostats.nspill ||
-                  ostats.nreload ||
-                  ostats.ndelmov ||
-                  ostats.nvar ||
-                  ostats.naddr ||
-                  0)
-                       print("\nstats\n");
-
-               if(ostats.ncvtreg)
-                       print(" %4d cvtreg\n", ostats.ncvtreg);
-               if(ostats.nspill)
-                       print(" %4d spill\n", ostats.nspill);
-               if(ostats.nreload)
-                       print(" %4d reload\n", ostats.nreload);
-               if(ostats.ndelmov)
-                       print(" %4d delmov\n", ostats.ndelmov);
-               if(ostats.nvar)
-                       print(" %4d var\n", ostats.nvar);
-               if(ostats.naddr)
-                       print(" %4d addr\n", ostats.naddr);
-
-               memset(&ostats, 0, sizeof(ostats));
-       }
-}
-
-static void
-walkvardef(Node *n, Reg *r, int active)
+char**
+regnames(int *n)
 {
-       Reg *r1, *r2;
-       int bn;
-       Var *v;
-       
-       for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
-               if(r1->f.active == active)
-                       break;
-               r1->f.active = active;
-               if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
-                       break;
-               for(v=n->opt; v!=nil; v=v->nextinnode) {
-                       bn = v - var;
-                       biset(&r1->act, bn);
-               }
-               if(r1->f.prog->as == ACALL)
-                       break;
-       }
-
-       for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
-               if(r2->f.s2 != nil)
-                       walkvardef(n, (Reg*)r2->f.s2, active);
+       *n = NREGVAR;
+       return regname;
 }
 
-/*
- * add mov b,rn
- * just after r
- */
-void
-addmove(Reg *r, int bn, int rn, int f)
+uint64
+excludedregs(void)
 {
-       Prog *p, *p1;
-       Adr *a;
-       Var *v;
-
-       p1 = mal(sizeof(*p1));
-       clearp(p1);
-       p1->pc = 9999;
-
-       p = r->f.prog;
-       p1->link = p->link;
-       p->link = p1;
-       p1->lineno = p->lineno;
-
-       v = var + bn;
-
-       a = &p1->to;
-       a->offset = v->offset;
-       a->etype = v->etype;
-       a->type = TYPE_MEM;
-       a->name = v->name;
-       a->node = v->node;
-       a->sym = linksym(v->node->sym);
-
-       // need to clean this up with wptr and
-       // some of the defaults
-       p1->as = AMOVL;
-       switch(simtype[(uchar)v->etype]) {
-       default:
-               fatal("unknown type %E", v->etype);
-       case TINT8:
-       case TUINT8:
-       case TBOOL:
-               p1->as = AMOVB;
-               break;
-       case TINT16:
-       case TUINT16:
-               p1->as = AMOVW;
-               break;
-       case TINT64:
-       case TUINT64:
-       case TPTR64:
-               p1->as = AMOVQ;
-               break;
-       case TFLOAT32:
-               p1->as = AMOVSS;
-               break;
-       case TFLOAT64:
-               p1->as = AMOVSD;
-               break;
-       case TINT32:
-       case TUINT32:
-       case TPTR32:
-               break;
-       }
-
-       p1->from.type = TYPE_REG;
-       p1->from.reg = rn;
-       p1->from.name = NAME_NONE;
-       if(!f) {
-               p1->from = *a;
-               *a = zprog.from;
-               a->type = TYPE_REG;
-               a->reg = rn;
-               if(v->etype == TUINT8)
-                       p1->as = AMOVB;
-               if(v->etype == TUINT16)
-                       p1->as = AMOVW;
-       }
-       if(debug['R'] && debug['v'])
-               print("%P ===add=== %P\n", p, p1);
-       ostats.nspill++;
+       return RtoB(REG_SP);
 }
 
-uint32
+uint64
 doregbits(int r)
 {
-       uint32 b;
+       uint64 b;
 
        b = 0;
        if(r >= REG_AX && r <= REG_R15)
@@ -598,592 +105,19 @@ doregbits(int r)
        return b;
 }
 
-static int
-overlap(int64 o1, int w1, int64 o2, int w2)
-{
-       int64 t1, t2;
-
-       t1 = o1+w1;
-       t2 = o2+w2;
-
-       if(!(t1 > o2 && t2 > o1))
-               return 0;
-
-       return 1;
-}
-
-Bits
-mkvar(Reg *r, Adr *a)
-{
-       Var *v;
-       int i, n, et, z, flag;
-       int64 w;
-       uint32 regu;
-       int64 o;
-       Bits bit;
-       Node *node;
-
-       /*
-        * mark registers used
-        */
-       if(a->type == TYPE_NONE)
-               goto none;
-
-       if(r != R)
-               r->use1.b[0] |= doregbits(a->index);
-
-       switch(a->type) {
-       default:
-               regu = doregbits(a->reg);
-               if(regu == 0)
-                       goto none;
-               bit = zbits;
-               bit.b[0] = regu;
-               return bit;
-
-       case TYPE_ADDR:
-               a->type = TYPE_MEM;
-               bit = mkvar(r, a);
-               setaddrs(bit);
-               a->type = TYPE_ADDR;
-               ostats.naddr++;
-               goto none;
-
-       case TYPE_MEM:
-               switch(a->name) {
-               default:
-                       goto none;
-               case NAME_EXTERN:
-               case NAME_STATIC:
-               case NAME_PARAM:
-               case NAME_AUTO:
-                       n = a->name;
-                       break;
-               }
-       }
-
-       node = a->node;
-       if(node == N || node->op != ONAME || node->orig == N)
-               goto none;
-       node = node->orig;
-       if(node->orig != node)
-               fatal("%D: bad node", a);
-       if(node->sym == S || node->sym->name[0] == '.')
-               goto none;
-       et = a->etype;
-       o = a->offset;
-       w = a->width;
-       if(w < 0)
-               fatal("bad width %lld for %D", w, a);
-
-       flag = 0;
-       for(i=0; i<nvar; i++) {
-               v = var+i;
-               if(v->node == node && v->name == n) {
-                       if(v->offset == o)
-                       if(v->etype == et)
-                       if(v->width == w)
-                               return blsh(i);
-
-                       // if they overlaps, disable both
-                       if(overlap(v->offset, v->width, o, w)) {
-//                             print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
-                               v->addr = 1;
-                               flag = 1;
-                       }
-               }
-       }
-       switch(et) {
-       case 0:
-       case TFUNC:
-               goto none;
-       }
-
-       if(nvar >= NVAR) {
-               if(debug['w'] > 1 && node != N)
-                       fatal("variable not optimized: %#N", node);
-               
-               // If we're not tracking a word in a variable, mark the rest as
-               // having its address taken, so that we keep the whole thing
-               // live at all calls. otherwise we might optimize away part of
-               // a variable but not all of it.
-               for(i=0; i<nvar; i++) {
-                       v = var+i;
-                       if(v->node == node)
-                               v->addr = 1;
-               }
-               goto none;
-       }
-
-       i = nvar;
-       nvar++;
-       v = var+i;
-       v->offset = o;
-       v->name = n;
-       v->etype = et;
-       v->width = w;
-       v->addr = flag;         // funny punning
-       v->node = node;
-       
-       // node->opt is the head of a linked list
-       // of Vars within the given Node, so that
-       // we can start at a Var and find all the other
-       // Vars in the same Go variable.
-       v->nextinnode = node->opt;
-       node->opt = v;
-
-       bit = blsh(i);
-       if(n == NAME_EXTERN || n == NAME_STATIC)
-               for(z=0; z<BITS; z++)
-                       externs.b[z] |= bit.b[z];
-       if(n == NAME_PARAM)
-               for(z=0; z<BITS; z++)
-                       params.b[z] |= bit.b[z];
-
-       if(node->class == PPARAM)
-               for(z=0; z<BITS; z++)
-                       ivar.b[z] |= bit.b[z];
-       if(node->class == PPARAMOUT)
-               for(z=0; z<BITS; z++)
-                       ovar.b[z] |= bit.b[z];
-
-       // Treat values with their address taken as live at calls,
-       // because the garbage collector's liveness analysis in ../gc/plive.c does.
-       // These must be consistent or else we will elide stores and the garbage
-       // collector will see uninitialized data.
-       // The typical case where our own analysis is out of sync is when the
-       // node appears to have its address taken but that code doesn't actually
-       // get generated and therefore doesn't show up as an address being
-       // taken when we analyze the instruction stream.
-       // One instance of this case is when a closure uses the same name as
-       // an outer variable for one of its own variables declared with :=.
-       // The parser flags the outer variable as possibly shared, and therefore
-       // sets addrtaken, even though it ends up not being actually shared.
-       // If we were better about _ elision, _ = &x would suffice too.
-       // The broader := in a closure problem is mentioned in a comment in
-       // closure.c:/^typecheckclosure and dcl.c:/^oldname.
-       if(node->addrtaken)
-               v->addr = 1;
-
-       // Disable registerization for globals, because:
-       // (1) we might panic at any time and we want the recovery code
-       // to see the latest values (issue 1304).
-       // (2) we don't know what pointers might point at them and we want
-       // loads via those pointers to see updated values and vice versa (issue 7995).
-       //
-       // Disable registerization for results if using defer, because the deferred func
-       // might recover and return, causing the current values to be used.
-       if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
-               v->addr = 1;
-
-       if(debug['R'])
-               print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
-       ostats.nvar++;
-
-       return bit;
-
-none:
-       return zbits;
-}
-
-void
-prop(Reg *r, Bits ref, Bits cal)
-{
-       Reg *r1, *r2;
-       int z, i, j;
-       Var *v, *v1;
-
-       for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
-               for(z=0; z<BITS; z++) {
-                       ref.b[z] |= r1->refahead.b[z];
-                       if(ref.b[z] != r1->refahead.b[z]) {
-                               r1->refahead.b[z] = ref.b[z];
-                               change++;
-                       }
-                       cal.b[z] |= r1->calahead.b[z];
-                       if(cal.b[z] != r1->calahead.b[z]) {
-                               r1->calahead.b[z] = cal.b[z];
-                               change++;
-                       }
-               }
-               switch(r1->f.prog->as) {
-               case ACALL:
-                       if(noreturn(r1->f.prog))
-                               break;
-
-                       // Mark all input variables (ivar) as used, because that's what the
-                       // liveness bitmaps say. The liveness bitmaps say that so that a
-                       // panic will not show stale values in the parameter dump.
-                       // Mark variables with a recent VARDEF (r1->act) as used,
-                       // so that the optimizer flushes initializations to memory,
-                       // so that if a garbage collection happens during this CALL,
-                       // the collector will see initialized memory. Again this is to
-                       // match what the liveness bitmaps say.
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
-                               ref.b[z] = 0;
-                       }
-                       
-                       // cal.b is the current approximation of what's live across the call.
-                       // Every bit in cal.b is a single stack word. For each such word,
-                       // find all the other tracked stack words in the same Go variable
-                       // (struct/slice/string/interface) and mark them live too.
-                       // This is necessary because the liveness analysis for the garbage
-                       // collector works at variable granularity, not at word granularity.
-                       // It is fundamental for slice/string/interface: the garbage collector
-                       // needs the whole value, not just some of the words, in order to
-                       // interpret the other bits correctly. Specifically, slice needs a consistent
-                       // ptr and cap, string needs a consistent ptr and len, and interface
-                       // needs a consistent type word and data word.
-                       for(z=0; z<BITS; z++) {
-                               if(cal.b[z] == 0)
-                                       continue;
-                               for(i=0; i<64; i++) {
-                                       if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
-                                               continue;
-                                       v = var+z*64+i;
-                                       if(v->node->opt == nil) // v represents fixed register, not Go variable
-                                               continue;
-
-                                       // v->node->opt is the head of a linked list of Vars
-                                       // corresponding to tracked words from the Go variable v->node.
-                                       // Walk the list and set all the bits.
-                                       // For a large struct this could end up being quadratic:
-                                       // after the first setting, the outer loop (for z, i) would see a 1 bit
-                                       // for all of the remaining words in the struct, and for each such
-                                       // word would go through and turn on all the bits again.
-                                       // To avoid the quadratic behavior, we only turn on the bits if
-                                       // v is the head of the list or if the head's bit is not yet turned on.
-                                       // This will set the bits at most twice, keeping the overall loop linear.
-                                       v1 = v->node->opt;
-                                       j = v1 - var;
-                                       if(v == v1 || !btest(&cal, j)) {
-                                               for(; v1 != nil; v1 = v1->nextinnode) {
-                                                       j = v1 - var;
-                                                       biset(&cal, j);
-                                               }
-                                       }
-                               }
-                       }
-                       break;
-
-               case ATEXT:
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] = 0;
-                               ref.b[z] = 0;
-                       }
-                       break;
-
-               case ARET:
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] = externs.b[z] | ovar.b[z];
-                               ref.b[z] = 0;
-                       }
-                       break;
-               }
-               for(z=0; z<BITS; z++) {
-                       ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
-                               r1->use1.b[z] | r1->use2.b[z];
-                       cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
-                       r1->refbehind.b[z] = ref.b[z];
-                       r1->calbehind.b[z] = cal.b[z];
-               }
-               if(r1->f.active)
-                       break;
-               r1->f.active = 1;
-       }
-       for(; r != r1; r = (Reg*)r->f.p1)
-               for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
-                       prop(r2, r->refbehind, r->calbehind);
-}
-
-void
-synch(Reg *r, Bits dif)
-{
-       Reg *r1;
-       int z;
-
-       for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
-               for(z=0; z<BITS; z++) {
-                       dif.b[z] = (dif.b[z] &
-                               ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
-                                       r1->set.b[z] | r1->regdiff.b[z];
-                       if(dif.b[z] != r1->regdiff.b[z]) {
-                               r1->regdiff.b[z] = dif.b[z];
-                               change++;
-                       }
-               }
-               if(r1->f.active)
-                       break;
-               r1->f.active = 1;
-               for(z=0; z<BITS; z++)
-                       dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
-               if(r1->f.s2 != nil)
-                       synch((Reg*)r1->f.s2, dif);
-       }
-}
-
-uint32
-allreg(uint32 b, Rgn *r)
-{
-       Var *v;
-       int i;
-
-       v = var + r->varno;
-       r->regno = 0;
-       switch(v->etype) {
-
-       default:
-               fatal("unknown etype %d/%E", bitno(b), v->etype);
-               break;
-
-       case TINT8:
-       case TUINT8:
-       case TINT16:
-       case TUINT16:
-       case TINT32:
-       case TUINT32:
-       case TINT64:
-       case TUINT64:
-       case TINT:
-       case TUINT:
-       case TUINTPTR:
-       case TBOOL:
-       case TPTR32:
-       case TPTR64:
-               i = BtoR(~b);
-               if(i && r->cost > 0) {
-                       r->regno = i;
-                       return RtoB(i);
-               }
-               break;
-
-       case TFLOAT32:
-       case TFLOAT64:
-               i = BtoF(~b);
-               if(i && r->cost > 0) {
-                       r->regno = i;
-                       return FtoB(i);
-               }
-               break;
-       }
-       return 0;
-}
-
-void
-paint1(Reg *r, int bn)
-{
-       Reg *r1;
-       int z;
-       uint64 bb;
-
-       z = bn/64;
-       bb = 1LL<<(bn%64);
-       if(r->act.b[z] & bb)
-               return;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(r1->act.b[z] & bb)
-                       break;
-               r = r1;
-       }
-
-       if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
-               change -= CLOAD * r->f.loop;
-       }
-       for(;;) {
-               r->act.b[z] |= bb;
-
-               if(r->f.prog->as != ANOP) { // don't give credit for NOPs
-                       if(r->use1.b[z] & bb)
-                               change += CREF * r->f.loop;
-                       if((r->use2.b[z]|r->set.b[z]) & bb)
-                               change += CREF * r->f.loop;
-               }
-
-               if(STORE(r) & r->regdiff.b[z] & bb) {
-                       change -= CLOAD * r->f.loop;
-               }
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       paint1(r1, bn);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               paint1(r1, bn);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(r->act.b[z] & bb)
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-}
-
-uint32
-paint2(Reg *r, int bn, int depth)
-{
-       Reg *r1;
-       int z;
-       uint64 bb, vreg;
-
-       z = bn/64;
-       bb = 1LL << (bn%64);
-       vreg = regbits;
-       if(!(r->act.b[z] & bb))
-               return vreg;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(!(r1->act.b[z] & bb))
-                       break;
-               r = r1;
-       }
-       for(;;) {
-               if(debug['R'] && debug['v'])
-                       print("  paint2 %d %P\n", depth, r->f.prog);
-
-               r->act.b[z] &= ~bb;
-
-               vreg |= r->regu;
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       vreg |= paint2(r1, bn, depth+1);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               vreg |= paint2(r1, bn, depth+1);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(!(r->act.b[z] & bb))
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-
-       return vreg;
-}
-
-void
-paint3(Reg *r, int bn, uint32 rb, int rn)
-{
-       Reg *r1;
-       Prog *p;
-       int z;
-       uint64 bb;
-
-       z = bn/64;
-       bb = 1LL << (bn%64);
-       if(r->act.b[z] & bb)
-               return;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(r1->act.b[z] & bb)
-                       break;
-               r = r1;
-       }
-
-       if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
-               addmove(r, bn, rn, 0);
-       for(;;) {
-               r->act.b[z] |= bb;
-               p = r->f.prog;
-
-               if(r->use1.b[z] & bb) {
-                       if(debug['R'] && debug['v'])
-                               print("%P", p);
-                       addreg(&p->from, rn);
-                       if(debug['R'] && debug['v'])
-                               print(" ===change== %P\n", p);
-               }
-               if((r->use2.b[z]|r->set.b[z]) & bb) {
-                       if(debug['R'] && debug['v'])
-                               print("%P", p);
-                       addreg(&p->to, rn);
-                       if(debug['R'] && debug['v'])
-                               print(" ===change== %P\n", p);
-               }
-
-               if(STORE(r) & r->regdiff.b[z] & bb)
-                       addmove(r, bn, rn, 1);
-               r->regu |= rb;
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       paint3(r1, bn, rb, rn);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               paint3(r1, bn, rb, rn);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(r->act.b[z] & bb)
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-}
-
-void
-addreg(Adr *a, int rn)
-{
-       a->sym = nil;
-       a->node = nil;
-       a->offset = 0;
-       a->type = TYPE_REG;
-       a->reg = rn;
-       a->name = 0;
-
-       ostats.ncvtreg++;
-}
-
-uint32
+uint64
 RtoB(int r)
 {
 
        if(r < REG_AX || r > REG_R15)
                return 0;
-       return 1L << (r-REG_AX);
+       return 1ULL << (r-REG_AX);
 }
 
 int
-BtoR(uint32 b)
+BtoR(uint64 b)
 {
-       b &= 0xffffL;
+       b &= 0xffffULL;
        if(nacl)
                b &= ~((1<<(REG_BP-REG_AX)) | (1<<(REG_R15-REG_AX)));
        else if(framepointer_enabled)
@@ -1200,16 +134,16 @@ BtoR(uint32 b)
  *     ...
  *     31      X15
  */
-uint32
+uint64
 FtoB(int f)
 {
        if(f < REG_X0 || f > REG_X15)
                return 0;
-       return 1L << (f - REG_X0 + 16);
+       return 1ULL << (f - REG_X0 + 16);
 }
 
 int
-BtoF(uint32 b)
+BtoF(uint64 b)
 {
 
        b &= 0xFFFF0000L;
@@ -1217,77 +151,3 @@ BtoF(uint32 b)
                return 0;
        return bitno(b) - 16 + REG_X0;
 }
-
-void
-dumpone(Flow *f, int isreg)
-{
-       int z;
-       Bits bit;
-       Reg *r;
-
-       print("%d:%P", f->loop, f->prog);
-       if(isreg) {     
-               r = (Reg*)f;
-               for(z=0; z<BITS; z++)
-                       bit.b[z] =
-                               r->set.b[z] |
-                               r->use1.b[z] |
-                               r->use2.b[z] |
-                               r->refbehind.b[z] |
-                               r->refahead.b[z] |
-                               r->calbehind.b[z] |
-                               r->calahead.b[z] |
-                               r->regdiff.b[z] |
-                               r->act.b[z] |
-                                       0;
-               if(bany(&bit)) {
-                       print("\t");
-                       if(bany(&r->set))
-                               print(" s:%Q", r->set);
-                       if(bany(&r->use1))
-                               print(" u1:%Q", r->use1);
-                       if(bany(&r->use2))
-                               print(" u2:%Q", r->use2);
-                       if(bany(&r->refbehind))
-                               print(" rb:%Q ", r->refbehind);
-                       if(bany(&r->refahead))
-                               print(" ra:%Q ", r->refahead);
-                       if(bany(&r->calbehind))
-                               print(" cb:%Q ", r->calbehind);
-                       if(bany(&r->calahead))
-                               print(" ca:%Q ", r->calahead);
-                       if(bany(&r->regdiff))
-                               print(" d:%Q ", r->regdiff);
-                       if(bany(&r->act))
-                               print(" a:%Q ", r->act);
-               }
-       }
-       print("\n");
-}
-
-void
-dumpit(char *str, Flow *r0, int isreg)
-{
-       Flow *r, *r1;
-
-       print("\n%s\n", str);
-       for(r = r0; r != nil; r = r->link) {
-               dumpone(r, isreg);
-               r1 = r->p2;
-               if(r1 != nil) {
-                       print(" pred:");
-                       for(; r1 != nil; r1 = r1->p2link)
-                               print(" %.4ud", (int)r1->prog->pc);
-                       print("\n");
-               }
-               // Print successors if it's not just the next one
-               if(r->s1 != r->link || r->s2 != nil) {
-                       print(" succ:");
-                       if(r->s1 != nil)
-                               print(" %.4ud", (int)r->s1->prog->pc);
-                       if(r->s2 != nil)
-                               print(" %.4ud", (int)r->s2->prog->pc);
-                       print("\n");
-               }
-       }
-}
index 3fb48ad5e4cd1c27ed6057b648ac64281d473892..3ee3dc2f3a7e6fcbede9b793b9e2f945f99aa44b 100644 (file)
@@ -66,14 +66,22 @@ main(int argc, char **argv)
        arch.ginscall = ginscall;
        arch.igen = igen;
        arch.linkarchinit = linkarchinit;
+       arch.peep = peep;
        arch.proginfo = proginfo;
        arch.regalloc = regalloc;
        arch.regfree = regfree;
-       arch.regopt = regopt;
        arch.regtyp = regtyp;
        arch.sameaddr = sameaddr;
        arch.smallindir = smallindir;
        arch.stackaddr = stackaddr;
+       arch.excludedregs = excludedregs;
+       arch.RtoB = RtoB;
+       arch.FtoB = FtoB;
+       arch.BtoR = BtoR;
+       arch.BtoF = BtoF;
+       arch.optoas = optoas;
+       arch.doregbits = doregbits;
+       arch.regnames = regnames;
        
        gcmain(argc, argv);
 }
index 46a61b4d1cef2b576b6e2c247f9e31a58b339dbf..872d9465926abbc8644902deabf6b378c98ac489 100644 (file)
@@ -171,3 +171,19 @@ int sameaddr(Addr*, Addr*);
 int smallindir(Addr*, Addr*);
 int stackaddr(Addr*);
 Prog* unpatch(Prog*);
+
+/*
+ * reg.c
+ */
+uint64 excludedregs(void);
+uint64 RtoB(int);
+uint64 FtoB(int);
+int BtoR(uint64);
+int BtoF(uint64);
+uint64 doregbits(int);
+char** regnames(int*);
+
+/*
+ * peep.c
+ */
+void peep(Prog*);
index 4cd159edcd6c1dc41737f95ff771e6817eb481a6..8188348282bd003101ab0608b452e4e573c32fcb 100644 (file)
@@ -7,7 +7,7 @@
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
 static Prog *appendpp(Prog*, int, int, int, vlong, int, int, vlong);
 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax);
index 76c8e2d14fdabf908a99ef45907bd97c80258a24..959ef20592c42f97ae0a3bee540ddd75f102d8e4 100644 (file)
@@ -187,6 +187,14 @@ optoas(int op, Type *t)
        case CASE(OAS, TPTR32):
                a = AMOVL;
                break;
+       
+       case CASE(OAS, TFLOAT32):
+               a = AMOVSS;
+               break;
+       
+       case CASE(OAS, TFLOAT64):
+               a = AMOVSD;
+               break;
 
        case CASE(OADD, TINT8):
        case CASE(OADD, TUINT8):
diff --git a/src/cmd/8g/opt.h b/src/cmd/8g/opt.h
deleted file mode 100644 (file)
index 8378d5d..0000000
+++ /dev/null
@@ -1,192 +0,0 @@
-// Derived from Inferno utils/6c/gc.h
-// http://code.google.com/p/inferno-os/source/browse/utils/6c/gc.h
-//
-//     Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
-//     Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
-//     Portions Copyright © 1997-1999 Vita Nuova Limited
-//     Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
-//     Portions Copyright © 2004,2006 Bruce Ellis
-//     Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
-//     Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
-//     Portions Copyright © 2009 The Go Authors.  All rights reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-
-#define        Z       N
-#define        Adr     Addr
-
-#define        BLOAD(r)        band(bnot(r->refbehind), r->refahead)
-#define        BSTORE(r)       band(bnot(r->calbehind), r->calahead)
-#define        LOAD(r)         (~r->refbehind.b[z] & r->refahead.b[z])
-#define        STORE(r)        (~r->calbehind.b[z] & r->calahead.b[z])
-
-#define        CLOAD   5
-#define        CREF    5
-#define        CINF    1000
-#define        LOOP    3
-
-typedef        struct  Reg     Reg;
-typedef        struct  Rgn     Rgn;
-
-/*c2go
-extern Node *Z;
-enum
-{
-       CLOAD = 5,
-       CREF = 5,
-       CINF = 1000,
-       LOOP = 3,
-};
-
-uint32 BLOAD(Reg*);
-uint32 BSTORE(Reg*);
-uint64 LOAD(Reg*);
-uint64 STORE(Reg*);
-*/
-
-// A Reg is a wrapper around a single Prog (one instruction) that holds
-// register optimization information while the optimizer runs.
-// r->prog is the instruction.
-// r->prog->opt points back to r.
-struct Reg
-{
-       Flow    f;
-
-       Bits    set;            // regopt variables written by this instruction.
-       Bits    use1;           // regopt variables read by prog->from.
-       Bits    use2;           // regopt variables read by prog->to.
-
-       // refahead/refbehind are the regopt variables whose current
-       // value may be used in the following/preceding instructions
-       // up to a CALL (or the value is clobbered).
-       Bits    refbehind;
-       Bits    refahead;
-       // calahead/calbehind are similar, but for variables in
-       // instructions that are reachable after hitting at least one
-       // CALL.
-       Bits    calbehind;
-       Bits    calahead;
-       Bits    regdiff;
-       Bits    act;
-
-       int32   regu;           // register used bitmap
-       int32   rpo;            // reverse post ordering
-       int32   active;
-
-       uint16  loop;           // x5 for every loop
-       uchar   refset;         // diagnostic generated
-
-       Reg*    p1;             // predecessors of this instruction: p1,
-       Reg*    p2;             // and then p2 linked though p2link.
-       Reg*    p2link;
-       Reg*    s1;             // successors of this instruction (at most two: s1 and s2).
-       Reg*    s2;
-       Reg*    link;           // next instruction in function code
-       Prog*   prog;           // actual instruction
-};
-#define        R       ((Reg*)0)
-/*c2go extern Reg *R; */
-
-#define        NRGN    600
-/*c2go enum { NRGN = 600 }; */
-
-// A Rgn represents a single regopt variable over a region of code
-// where a register could potentially be dedicated to that variable.
-// The code encompassed by a Rgn is defined by the flow graph,
-// starting at enter, flood-filling forward while varno is refahead
-// and backward while varno is refbehind, and following branches.  A
-// single variable may be represented by multiple disjoint Rgns and
-// each Rgn may choose a different register for that variable.
-// Registers are allocated to regions greedily in order of descending
-// cost.
-struct Rgn
-{
-       Reg*    enter;
-       short   cost;
-       short   varno;
-       short   regno;
-};
-
-EXTERN int32   exregoffset;            // not set
-EXTERN int32   exfregoffset;           // not set
-EXTERN Reg     zreg;
-EXTERN Reg*    freer;
-EXTERN Reg**   rpo2r;
-EXTERN Rgn     region[NRGN];
-EXTERN Rgn*    rgp;
-EXTERN int     nregion;
-EXTERN int     nvar;
-EXTERN int32   regbits;
-EXTERN int32   exregbits;
-EXTERN Bits    externs;
-EXTERN Bits    params;
-EXTERN Bits    consts;
-EXTERN Bits    addrs;
-EXTERN Bits    ivar;
-EXTERN Bits    ovar;
-EXTERN int     change;
-EXTERN int32   maxnr;
-EXTERN int32*  idom;
-
-EXTERN struct
-{
-       int32   ncvtreg;
-       int32   nspill;
-       int32   nreload;
-       int32   ndelmov;
-       int32   nvar;
-       int32   naddr;
-} ostats;
-
-/*
- * reg.c
- */
-Reg*   rega(void);
-int    rcmp(const void*, const void*);
-void   regopt(Prog*);
-void   addmove(Reg*, int, int, int);
-Bits   mkvar(Reg*, Adr*);
-void   prop(Reg*, Bits, Bits);
-void   loopit(Reg*, int32);
-void   synch(Reg*, Bits);
-uint32 allreg(uint32, Rgn*);
-void   paint1(Reg*, int);
-uint32 paint2(Reg*, int, int);
-void   paint3(Reg*, int, uint32, int);
-void   addreg(Adr*, int);
-void   dumpone(Flow*, int);
-void   dumpit(char*, Flow*, int);
-
-/*
- * peep.c
- */
-void   peep(Prog*);
-void   excise(Flow*);
-int    copyu(Prog*, Adr*, Adr*);
-
-uint32 RtoB(int);
-uint32 FtoB(int);
-int    BtoR(uint32);
-int    BtoF(uint32);
-
-/*
- * prog.c
- */
-void proginfo(ProgInfo*, Prog*);
index 6c0865a7e83e83d237506ba9bec1f03b59a5960a..9b514a8964b90464cfa981987096e4fe17596076 100644 (file)
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
 enum {
        REGEXT = 0,
+       exregoffset = REG_DI,
 };
 
 static void    conprop(Flow *r);
@@ -45,6 +46,7 @@ static int    copy1(Adr*, Adr*, Flow*, int);
 static int     copyas(Adr*, Adr*);
 static int     copyau(Adr*, Adr*);
 static int     copysub(Adr*, Adr*, Adr*, int);
+static int     copyu(Prog*, Adr*, Adr*);
 
 static uint32  gactive;
 
@@ -535,7 +537,7 @@ copy1(Adr *v1, Adr *v2, Flow *r, int f)
  * 4 if set and used
  * 0 otherwise (not touched)
  */
-int
+static int
 copyu(Prog *p, Adr *v, Adr *s)
 {
        ProgInfo info;
index 8a7371b5c409e9ac01d80705a60bfd4455d9ca0c..e77a026a93ca611cd3f6b9385311f5e9a776b073 100644 (file)
@@ -5,7 +5,7 @@
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
 // Matches real RtoB but can be used in global initializer.
 #define RtoB(r) (1<<((r)-REG_AX))
index 7d2de5354937166046a87221777fbca27af5c21a..0470bdf7b5d3b67020105884a2ffd11b1446e68b 100644 (file)
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
-#define        NREGVAR 16      /* 8 integer + 8 floating */
-#define        REGBITS ((uint64)0xffffull)
-/*c2go enum {
-       NREGVAR = 16,
-       REGBITS = (1<<NREGVAR) - 1,
+enum {
+       NREGVAR = 16,   /* 8 integer + 8 floating */
 };
-*/
-
-static Reg*    firstr;
-static int     first   = 1;
-
-int
-rcmp(const void *a1, const void *a2)
-{
-       Rgn *p1, *p2;
-       int c1, c2;
-
-       p1 = (Rgn*)a1;
-       p2 = (Rgn*)a2;
-       c1 = p2->cost;
-       c2 = p1->cost;
-       if(c1 -= c2)
-               return c1;
-       return p2->varno - p1->varno;
-}
-
-static void
-setaddrs(Bits bit)
-{
-       int i, n;
-       Var *v;
-       Node *node;
-
-       while(bany(&bit)) {
-               // convert each bit to a variable
-               i = bnum(bit);
-               node = var[i].node;
-               n = var[i].name;
-               biclr(&bit, i);
-
-               // disable all pieces of that variable
-               for(i=0; i<nvar; i++) {
-                       v = var+i;
-                       if(v->node == node && v->name == n)
-                               v->addr = 2;
-               }
-       }
-}
 
 static char* regname[] = {
        ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di",
        ".x0", ".x1", ".x2", ".x3", ".x4", ".x5", ".x6", ".x7",
 };
 
-static Node* regnodes[NREGVAR];
-
-static void walkvardef(Node *n, Reg *r, int active);
-
-void
-regopt(Prog *firstp)
+char**
+regnames(int *n)
 {
-       Reg *r, *r1;
-       Prog *p;
-       Graph *g;
-       ProgInfo info;
-       int i, z, active;
-       uint32 vreg;
-       Bits bit;
-
-       if(first) {
-               fmtinstall('Q', Qconv);
-               exregoffset = REG_DI;   // no externals
-               first = 0;
-       }
-
-       mergetemp(firstp);
-
-       /*
-        * control flow is more complicated in generated go code
-        * than in generated c code.  define pseudo-variables for
-        * registers, so we have complete register usage information.
-        */
-       nvar = NREGVAR;
-       memset(var, 0, NREGVAR*sizeof var[0]);
-       for(i=0; i<NREGVAR; i++) {
-               if(regnodes[i] == N)
-                       regnodes[i] = newname(lookup(regname[i]));
-               var[i].node = regnodes[i];
-       }
-
-       regbits = RtoB(REG_SP);
-       for(z=0; z<BITS; z++) {
-               externs.b[z] = 0;
-               params.b[z] = 0;
-               consts.b[z] = 0;
-               addrs.b[z] = 0;
-               ivar.b[z] = 0;
-               ovar.b[z] = 0;
-       }
-
-       /*
-        * pass 1
-        * build aux data structure
-        * allocate pcs
-        * find use and set of variables
-        */
-       g = flowstart(firstp, sizeof(Reg));
-       if(g == nil) {
-               for(i=0; i<nvar; i++)
-                       var[i].node->opt = nil;
-               return;
-       }
-
-       firstr = (Reg*)g->start;
-
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               p = r->f.prog;
-               if(p->as == AVARDEF || p->as == AVARKILL)
-                       continue;
-               proginfo(&info, p);
-
-               // Avoid making variables for direct-called functions.
-               if(p->as == ACALL && p->to.type == TYPE_MEM && p->to.name == NAME_EXTERN)
-                       continue;
-
-               r->use1.b[0] |= info.reguse | info.regindex;
-               r->set.b[0] |= info.regset;
-
-               bit = mkvar(r, &p->from);
-               if(bany(&bit)) {
-                       if(info.flags & LeftAddr)
-                               setaddrs(bit);
-                       if(info.flags & LeftRead)
-                               for(z=0; z<BITS; z++)
-                                       r->use1.b[z] |= bit.b[z];
-                       if(info.flags & LeftWrite)
-                               for(z=0; z<BITS; z++)
-                                       r->set.b[z] |= bit.b[z];
-               }
-
-               bit = mkvar(r, &p->to);
-               if(bany(&bit)) {        
-                       if(info.flags & RightAddr)
-                               setaddrs(bit);
-                       if(info.flags & RightRead)
-                               for(z=0; z<BITS; z++)
-                                       r->use2.b[z] |= bit.b[z];
-                       if(info.flags & RightWrite)
-                               for(z=0; z<BITS; z++)
-                                       r->set.b[z] |= bit.b[z];
-               }
-       }
-       if(firstr == R)
-               return;
-
-       for(i=0; i<nvar; i++) {
-               Var *v = var+i;
-               if(v->addr) {
-                       bit = blsh(i);
-                       for(z=0; z<BITS; z++)
-                               addrs.b[z] |= bit.b[z];
-               }
-
-               if(debug['R'] && debug['v'])
-                       print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
-                               i, v->addr, v->etype, v->width, v->node, v->offset);
-       }
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass1", &firstr->f, 1);
-
-       /*
-        * pass 2
-        * find looping structure
-        */
-       flowrpo(g);
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass2", &firstr->f, 1);
-
-       /*
-        * pass 2.5
-        * iterate propagating fat vardef covering forward
-        * r->act records vars with a VARDEF since the last CALL.
-        * (r->act will be reused in pass 5 for something else,
-        * but we'll be done with it by then.)
-        */
-       active = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               r->f.active = 0;
-               r->act = zbits;
-       }
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               p = r->f.prog;
-               if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) {
-                       active++;
-                       walkvardef(p->to.node, r, active);
-               }
-       }
-
-       /*
-        * pass 3
-        * iterate propagating usage
-        *      back until flow graph is complete
-        */
-loop1:
-       change = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->f.active = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               if(r->f.prog->as == ARET)
-                       prop(r, zbits, zbits);
-loop11:
-       /* pick up unreachable code */
-       i = 0;
-       for(r = firstr; r != R; r = r1) {
-               r1 = (Reg*)r->f.link;
-               if(r1 && r1->f.active && !r->f.active) {
-                       prop(r, zbits, zbits);
-                       i = 1;
-               }
-       }
-       if(i)
-               goto loop11;
-       if(change)
-               goto loop1;
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass3", &firstr->f, 1);
-
-       /*
-        * pass 4
-        * iterate propagating register/variable synchrony
-        *      forward until graph is complete
-        */
-loop2:
-       change = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->f.active = 0;
-       synch(firstr, zbits);
-       if(change)
-               goto loop2;
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass4", &firstr->f, 1);
-
-       /*
-        * pass 4.5
-        * move register pseudo-variables into regu.
-        */
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
-
-               r->set.b[0] &= ~REGBITS;
-               r->use1.b[0] &= ~REGBITS;
-               r->use2.b[0] &= ~REGBITS;
-               r->refbehind.b[0] &= ~REGBITS;
-               r->refahead.b[0] &= ~REGBITS;
-               r->calbehind.b[0] &= ~REGBITS;
-               r->calahead.b[0] &= ~REGBITS;
-               r->regdiff.b[0] &= ~REGBITS;
-               r->act.b[0] &= ~REGBITS;
-       }
-
-       /*
-        * pass 5
-        * isolate regions
-        * calculate costs (paint1)
-        */
-       r = firstr;
-       if(r) {
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
-                         ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
-               if(bany(&bit) && !r->f.refset) {
-                       // should never happen - all variables are preset
-                       if(debug['w'])
-                               print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
-                       r->f.refset = 1;
-               }
-       }
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->act = zbits;
-       rgp = region;
-       nregion = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = r->set.b[z] &
-                         ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
-               if(bany(&bit) && !r->f.refset) {
-                       if(debug['w'])
-                               print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
-                       r->f.refset = 1;
-                       excise(&r->f);
-               }
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
-               while(bany(&bit)) {
-                       i = bnum(bit);
-                       rgp->enter = r;
-                       rgp->varno = i;
-                       change = 0;
-                       paint1(r, i);
-                       biclr(&bit, i);
-                       if(change <= 0)
-                               continue;
-                       rgp->cost = change;
-                       nregion++;
-                       if(nregion >= NRGN) {
-                               if(debug['R'] && debug['v'])
-                                       print("too many regions\n");
-                               goto brk;
-                       }
-                       rgp++;
-               }
-       }
-brk:
-       qsort(region, nregion, sizeof(region[0]), rcmp);
-
-       /*
-        * pass 6
-        * determine used registers (paint2)
-        * replace code (paint3)
-        */
-       rgp = region;
-       if(debug['R'] && debug['v'])
-               print("\nregisterizing\n");
-       for(i=0; i<nregion; i++) {
-               if(debug['R'] && debug['v'])
-                       print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
-               bit = blsh(rgp->varno);
-               vreg = paint2(rgp->enter, rgp->varno, 0);
-               vreg = allreg(vreg, rgp);
-               if(rgp->regno != 0)
-                       paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
-               rgp++;
-       }
-
-       /*
-        * free aux structures. peep allocates new ones.
-        */
-       for(i=0; i<nvar; i++)
-               var[i].node->opt = nil;
-       flowend(g);
-       firstr = R;
-
-       if(debug['R'] && debug['v']) {
-               // Rebuild flow graph, since we inserted instructions
-               g = flowstart(firstp, sizeof(Reg));
-               firstr = (Reg*)g->start;
-               dumpit("pass6", &firstr->f, 1);
-               flowend(g);
-               firstr = R;
-       }
-
-       /*
-        * pass 7
-        * peep-hole on basic block
-        */
-       if(!debug['R'] || debug['P'])
-               peep(firstp);
-
-       /*
-        * eliminate nops
-        */
-       for(p=firstp; p!=P; p=p->link) {
-               while(p->link != P && p->link->as == ANOP)
-                       p->link = p->link->link;
-               if(p->to.type == TYPE_BRANCH)
-                       while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
-                               p->to.u.branch = p->to.u.branch->link;
-       }
-
-       if(!use_sse)
-       for(p=firstp; p!=P; p=p->link) {
-               if(p->from.reg >= REG_X0 && p->from.reg <= REG_X7)
-                       fatal("invalid use of %R with GO386=387: %P", p->from.reg, p);
-               if(p->to.reg >= REG_X0 && p->to.reg <= REG_X7)
-                       fatal("invalid use of %R with GO386=387: %P", p->to.reg, p);
-       }
-
-       if(debug['R']) {
-               if(ostats.ncvtreg ||
-                  ostats.nspill ||
-                  ostats.nreload ||
-                  ostats.ndelmov ||
-                  ostats.nvar ||
-                  ostats.naddr ||
-                  0)
-                       print("\nstats\n");
-
-               if(ostats.ncvtreg)
-                       print(" %4d cvtreg\n", ostats.ncvtreg);
-               if(ostats.nspill)
-                       print(" %4d spill\n", ostats.nspill);
-               if(ostats.nreload)
-                       print(" %4d reload\n", ostats.nreload);
-               if(ostats.ndelmov)
-                       print(" %4d delmov\n", ostats.ndelmov);
-               if(ostats.nvar)
-                       print(" %4d var\n", ostats.nvar);
-               if(ostats.naddr)
-                       print(" %4d addr\n", ostats.naddr);
-
-               memset(&ostats, 0, sizeof(ostats));
-       }
+       *n = NREGVAR;
+       return regname;
 }
 
-static void
-walkvardef(Node *n, Reg *r, int active)
+uint64
+excludedregs(void)
 {
-       Reg *r1, *r2;
-       int bn;
-       Var *v;
-       
-       for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
-               if(r1->f.active == active)
-                       break;
-               r1->f.active = active;
-               if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
-                       break;
-               for(v=n->opt; v!=nil; v=v->nextinnode) {
-                       bn = v - var;
-                       biset(&r1->act, bn);
-               }
-               if(r1->f.prog->as == ACALL)
-                       break;
-       }
-
-       for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
-               if(r2->f.s2 != nil)
-                       walkvardef(n, (Reg*)r2->f.s2, active);
+       return RtoB(REG_SP);
 }
 
-/*
- * add mov b,rn
- * just after r
- */
-void
-addmove(Reg *r, int bn, int rn, int f)
-{
-       Prog *p, *p1;
-       Adr *a;
-       Var *v;
-
-       p1 = mal(sizeof(*p1));
-       clearp(p1);
-       p1->pc = 9999;
-
-       p = r->f.prog;
-       p1->link = p->link;
-       p->link = p1;
-       p1->lineno = p->lineno;
-
-       v = var + bn;
-
-       a = &p1->to;
-       a->offset = v->offset;
-       a->etype = v->etype;
-       a->type = TYPE_MEM;
-       a->name = v->name;
-       a->node = v->node;
-       a->sym = linksym(v->node->sym);
-
-       // need to clean this up with wptr and
-       // some of the defaults
-       p1->as = AMOVL;
-       switch(v->etype) {
-       default:
-               fatal("unknown type %E", v->etype);
-       case TINT8:
-       case TUINT8:
-       case TBOOL:
-               p1->as = AMOVB;
-               break;
-       case TINT16:
-       case TUINT16:
-               p1->as = AMOVW;
-               break;
-       case TFLOAT32:
-               p1->as = AMOVSS;
-               break;
-       case TFLOAT64:
-               p1->as = AMOVSD;
-               break;
-       case TINT:
-       case TUINT:
-       case TINT32:
-       case TUINT32:
-       case TPTR32:
-               break;
-       }
-
-       p1->from.type = TYPE_REG;
-       p1->from.reg = rn;
-       p1->from.name = 0;
-       if(!f) {
-               p1->from = *a;
-               *a = zprog.from;
-               a->type = TYPE_REG;
-               a->reg = rn;
-               if(v->etype == TUINT8)
-                       p1->as = AMOVB;
-               if(v->etype == TUINT16)
-                       p1->as = AMOVW;
-       }
-       if(debug['R'] && debug['v'])
-               print("%P ===add=== %P\n", p, p1);
-       ostats.nspill++;
-}
-
-uint32
+uint64
 doregbits(int r)
 {
-       uint32 b;
+       uint64 b;
 
        b = 0;
        if(r >= REG_AX && r <= REG_DI)
@@ -564,605 +75,17 @@ doregbits(int r)
        return b;
 }
 
-static int
-overlap(int32 o1, int w1, int32 o2, int w2)
-{
-       int32 t1, t2;
-
-       t1 = o1+w1;
-       t2 = o2+w2;
-
-       if(!(t1 > o2 && t2 > o1))
-               return 0;
-
-       return 1;
-}
-
-Bits
-mkvar(Reg *r, Adr *a)
-{
-       Var *v;
-       int i, n, et, z, w, flag, regu;
-       int32 o;
-       Bits bit;
-       Node *node;
-
-       /*
-        * mark registers used
-        */
-       if(a->type == TYPE_NONE)
-               goto none;
-
-       if(r != R)
-               r->use1.b[0] |= doregbits(a->index);
-
-       switch(a->type) {
-       default:
-               regu = doregbits(a->reg);
-               if(regu == 0)
-                       goto none;
-               bit = zbits;
-               bit.b[0] = regu;
-               return bit;
-
-       case TYPE_ADDR:
-               a->type = TYPE_MEM;
-               bit = mkvar(r, a);
-               setaddrs(bit);
-               a->type = TYPE_ADDR;
-               ostats.naddr++;
-               goto none;
-
-       case TYPE_MEM:
-               switch(a->name) {
-               default:
-                       goto none;
-               case NAME_EXTERN:
-               case NAME_STATIC:
-               case NAME_PARAM:
-               case NAME_AUTO:
-                       n = a->name;
-                       break;
-               }
-       }
-
-       node = a->node;
-       if(node == N || node->op != ONAME || node->orig == N)
-               goto none;
-       node = node->orig;
-       if(node->orig != node)
-               fatal("%D: bad node", a);
-       if(node->sym == S || node->sym->name[0] == '.')
-               goto none;
-       et = a->etype;
-       o = a->offset;
-       w = a->width;
-       if(w < 0)
-               fatal("bad width %d for %D", w, a);
-
-       flag = 0;
-       for(i=0; i<nvar; i++) {
-               v = var+i;
-               if(v->node == node && v->name == n) {
-                       if(v->offset == o)
-                       if(v->etype == et)
-                       if(v->width == w)
-                               return blsh(i);
-
-                       // if they overlap, disable both
-                       if(overlap(v->offset, v->width, o, w)) {
-                               if(debug['R'])
-                                       print("disable %s\n", node->sym->name);
-                               v->addr = 1;
-                               flag = 1;
-                       }
-               }
-       }
-
-       switch(et) {
-       case 0:
-       case TFUNC:
-               goto none;
-       }
-
-       if(nvar >= NVAR) {
-               if(debug['w'] > 1 && node != N)
-                       fatal("variable not optimized: %D", a);
-               
-               // If we're not tracking a word in a variable, mark the rest as
-               // having its address taken, so that we keep the whole thing
-               // live at all calls. otherwise we might optimize away part of
-               // a variable but not all of it.
-               for(i=0; i<nvar; i++) {
-                       v = var+i;
-                       if(v->node == node)
-                               v->addr = 1;
-               }
-               goto none;
-       }
-
-       i = nvar;
-       nvar++;
-       v = var+i;
-       v->offset = o;
-       v->name = n;
-       v->etype = et;
-       v->width = w;
-       v->addr = flag;         // funny punning
-       v->node = node;
-       
-       // node->opt is the head of a linked list
-       // of Vars within the given Node, so that
-       // we can start at a Var and find all the other
-       // Vars in the same Go variable.
-       v->nextinnode = node->opt;
-       node->opt = v;
-
-       bit = blsh(i);
-       if(n == NAME_EXTERN || n == NAME_STATIC)
-               for(z=0; z<BITS; z++)
-                       externs.b[z] |= bit.b[z];
-       if(n == NAME_PARAM)
-               for(z=0; z<BITS; z++)
-                       params.b[z] |= bit.b[z];
-               
-       if(node->class == PPARAM)
-               for(z=0; z<BITS; z++)
-                       ivar.b[z] |= bit.b[z];
-       if(node->class == PPARAMOUT)
-               for(z=0; z<BITS; z++)
-                       ovar.b[z] |= bit.b[z];
-
-       // Treat values with their address taken as live at calls,
-       // because the garbage collector's liveness analysis in ../gc/plive.c does.
-       // These must be consistent or else we will elide stores and the garbage
-       // collector will see uninitialized data.
-       // The typical case where our own analysis is out of sync is when the
-       // node appears to have its address taken but that code doesn't actually
-       // get generated and therefore doesn't show up as an address being
-       // taken when we analyze the instruction stream.
-       // One instance of this case is when a closure uses the same name as
-       // an outer variable for one of its own variables declared with :=.
-       // The parser flags the outer variable as possibly shared, and therefore
-       // sets addrtaken, even though it ends up not being actually shared.
-       // If we were better about _ elision, _ = &x would suffice too.
-       // The broader := in a closure problem is mentioned in a comment in
-       // closure.c:/^typecheckclosure and dcl.c:/^oldname.
-       if(node->addrtaken)
-               v->addr = 1;
-
-       // Disable registerization for globals, because:
-       // (1) we might panic at any time and we want the recovery code
-       // to see the latest values (issue 1304).
-       // (2) we don't know what pointers might point at them and we want
-       // loads via those pointers to see updated values and vice versa (issue 7995).
-       //
-       // Disable registerization for results if using defer, because the deferred func
-       // might recover and return, causing the current values to be used.
-       if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
-               v->addr = 1;
-
-       if(debug['R'])
-               print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
-       ostats.nvar++;
-
-       return bit;
-
-none:
-       return zbits;
-}
-
-void
-prop(Reg *r, Bits ref, Bits cal)
-{
-       Reg *r1, *r2;
-       int z, i, j;
-       Var *v, *v1;
-
-       for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
-               for(z=0; z<BITS; z++) {
-                       ref.b[z] |= r1->refahead.b[z];
-                       if(ref.b[z] != r1->refahead.b[z]) {
-                               r1->refahead.b[z] = ref.b[z];
-                               change++;
-                       }
-                       cal.b[z] |= r1->calahead.b[z];
-                       if(cal.b[z] != r1->calahead.b[z]) {
-                               r1->calahead.b[z] = cal.b[z];
-                               change++;
-                       }
-               }
-               switch(r1->f.prog->as) {
-               case ACALL:
-                       if(noreturn(r1->f.prog))
-                               break;
-
-                       // Mark all input variables (ivar) as used, because that's what the
-                       // liveness bitmaps say. The liveness bitmaps say that so that a
-                       // panic will not show stale values in the parameter dump.
-                       // Mark variables with a recent VARDEF (r1->act) as used,
-                       // so that the optimizer flushes initializations to memory,
-                       // so that if a garbage collection happens during this CALL,
-                       // the collector will see initialized memory. Again this is to
-                       // match what the liveness bitmaps say.
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
-                               ref.b[z] = 0;
-                       }
-                       
-                       // cal.b is the current approximation of what's live across the call.
-                       // Every bit in cal.b is a single stack word. For each such word,
-                       // find all the other tracked stack words in the same Go variable
-                       // (struct/slice/string/interface) and mark them live too.
-                       // This is necessary because the liveness analysis for the garbage
-                       // collector works at variable granularity, not at word granularity.
-                       // It is fundamental for slice/string/interface: the garbage collector
-                       // needs the whole value, not just some of the words, in order to
-                       // interpret the other bits correctly. Specifically, slice needs a consistent
-                       // ptr and cap, string needs a consistent ptr and len, and interface
-                       // needs a consistent type word and data word.
-                       for(z=0; z<BITS; z++) {
-                               if(cal.b[z] == 0)
-                                       continue;
-                               for(i=0; i<64; i++) {
-                                       if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
-                                               continue;
-                                       v = var+z*64+i;
-                                       if(v->node->opt == nil) // v represents fixed register, not Go variable
-                                               continue;
-
-                                       // v->node->opt is the head of a linked list of Vars
-                                       // corresponding to tracked words from the Go variable v->node.
-                                       // Walk the list and set all the bits.
-                                       // For a large struct this could end up being quadratic:
-                                       // after the first setting, the outer loop (for z, i) would see a 1 bit
-                                       // for all of the remaining words in the struct, and for each such
-                                       // word would go through and turn on all the bits again.
-                                       // To avoid the quadratic behavior, we only turn on the bits if
-                                       // v is the head of the list or if the head's bit is not yet turned on.
-                                       // This will set the bits at most twice, keeping the overall loop linear.
-                                       v1 = v->node->opt;
-                                       j = v1 - var;
-                                       if(v == v1 || !btest(&cal, j)) {
-                                               for(; v1 != nil; v1 = v1->nextinnode) {
-                                                       j = v1 - var;
-                                                       biset(&cal, j);
-                                               }
-                                       }
-                               }
-                       }
-                       break;
-
-               case ATEXT:
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] = 0;
-                               ref.b[z] = 0;
-                       }
-                       break;
-
-               case ARET:
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] = externs.b[z] | ovar.b[z];
-                               ref.b[z] = 0;
-                       }
-                       break;
-               }
-               for(z=0; z<BITS; z++) {
-                       ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
-                               r1->use1.b[z] | r1->use2.b[z];
-                       cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
-                       r1->refbehind.b[z] = ref.b[z];
-                       r1->calbehind.b[z] = cal.b[z];
-               }
-               if(r1->f.active)
-                       break;
-               r1->f.active = 1;
-       }
-       for(; r != r1; r = (Reg*)r->f.p1)
-               for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
-                       prop(r2, r->refbehind, r->calbehind);
-}
-
-void
-synch(Reg *r, Bits dif)
-{
-       Reg *r1;
-       int z;
-
-       for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
-               for(z=0; z<BITS; z++) {
-                       dif.b[z] = (dif.b[z] &
-                               ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
-                                       r1->set.b[z] | r1->regdiff.b[z];
-                       if(dif.b[z] != r1->regdiff.b[z]) {
-                               r1->regdiff.b[z] = dif.b[z];
-                               change++;
-                       }
-               }
-               if(r1->f.active)
-                       break;
-               r1->f.active = 1;
-               for(z=0; z<BITS; z++)
-                       dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
-               if((Reg*)r1->f.s2 != R)
-                       synch((Reg*)r1->f.s2, dif);
-       }
-}
-
-uint32
-allreg(uint32 b, Rgn *r)
-{
-       Var *v;
-       int i;
-
-       v = var + r->varno;
-       r->regno = 0;
-       switch(v->etype) {
-
-       default:
-               fatal("unknown etype %d/%E", bitno(b), v->etype);
-               break;
-
-       case TINT8:
-       case TUINT8:
-       case TINT16:
-       case TUINT16:
-       case TINT32:
-       case TUINT32:
-       case TINT64:
-       case TINT:
-       case TUINT:
-       case TUINTPTR:
-       case TBOOL:
-       case TPTR32:
-               i = BtoR(~b);
-               if(i && r->cost > 0) {
-                       r->regno = i;
-                       return RtoB(i);
-               }
-               break;
-
-       case TFLOAT32:
-       case TFLOAT64:
-               if(!use_sse)
-                       break;
-               i = BtoF(~b);
-               if(i && r->cost > 0) {
-                       r->regno = i;
-                       return FtoB(i);
-               }
-               break;
-       }
-       return 0;
-}
-
-void
-paint1(Reg *r, int bn)
-{
-       Reg *r1;
-       Prog *p;
-       int z;
-       uint64 bb, rbz;
-
-       z = bn/64;
-       bb = 1LL<<(bn%64);
-       if(r->act.b[z] & bb)
-               return;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(r1->act.b[z] & bb)
-                       break;
-               r = r1;
-       }
-
-       rbz = ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z]));
-       if(LOAD(r) & rbz & bb) {
-               change -= CLOAD * r->f.loop;
-       }
-       for(;;) {
-               r->act.b[z] |= bb;
-               p = r->f.prog;
-
-               if(r->f.prog->as != ANOP) { // don't give credit for NOPs
-                       if(r->use1.b[z] & bb) {
-                               change += CREF * r->f.loop;
-                               if(p->as == AFMOVL || p->as == AFMOVW)
-                                       if(BtoR(bb) != REG_F0)
-                                               change = -CINF;
-                       }
-                       if((r->use2.b[z]|r->set.b[z]) & bb) {
-                               change += CREF * r->f.loop;
-                               if(p->as == AFMOVL || p->as == AFMOVW)
-                                       if(BtoR(bb) != REG_F0)
-                                               change = -CINF;
-                       }
-               }
-
-               if(STORE(r) & r->regdiff.b[z] & bb) {
-                       change -= CLOAD * r->f.loop;
-                       if(p->as == AFMOVL || p->as == AFMOVW)
-                               if(BtoR(bb) != REG_F0)
-                                       change = -CINF;
-               }
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       paint1(r1, bn);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               paint1(r1, bn);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(r->act.b[z] & bb)
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-}
-
-uint32
-paint2(Reg *r, int bn, int depth)
-{
-       Reg *r1;
-       int z;
-       uint64 bb, vreg;
-
-       z = bn/64;
-       bb = 1LL << (bn%64);
-       vreg = regbits;
-       if(!(r->act.b[z] & bb))
-               return vreg;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(!(r1->act.b[z] & bb))
-                       break;
-               r = r1;
-       }
-       for(;;) {
-               if(debug['R'] && debug['v'])
-                       print("  paint2 %d %P\n", depth, r->f.prog);
-
-               r->act.b[z] &= ~bb;
-
-               vreg |= r->regu;
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       vreg |= paint2(r1, bn, depth+1);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               vreg |= paint2(r1, bn, depth+1);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(!(r->act.b[z] & bb))
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-
-       return vreg;
-}
-
-void
-paint3(Reg *r, int bn, uint32 rb, int rn)
-{
-       Reg *r1;
-       Prog *p;
-       int z;
-       uint64 bb, rbz;
-
-       z = bn/64;
-       bb = 1LL << (bn%64);
-       if(r->act.b[z] & bb)
-               return;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(r1->act.b[z] & bb)
-                       break;
-               r = r1;
-       }
-
-       rbz = ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z]));
-       if(LOAD(r) & rbz & bb)
-               addmove(r, bn, rn, 0);
-       for(;;) {
-               r->act.b[z] |= bb;
-               p = r->f.prog;
-
-               if(r->use1.b[z] & bb) {
-                       if(debug['R'] && debug['v'])
-                               print("%P", p);
-                       addreg(&p->from, rn);
-                       if(debug['R'] && debug['v'])
-                               print(" ===change== %P\n", p);
-               }
-               if((r->use2.b[z]|r->set.b[z]) & bb) {
-                       if(debug['R'] && debug['v'])
-                               print("%P", p);
-                       addreg(&p->to, rn);
-                       if(debug['R'] && debug['v'])
-                               print(" ===change== %P\n", p);
-               }
-
-               if(STORE(r) & r->regdiff.b[z] & bb)
-                       addmove(r, bn, rn, 1);
-               r->regu |= rb;
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       paint3(r1, bn, rb, rn);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               paint3(r1, bn, rb, rn);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(r->act.b[z] & bb)
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-}
-
-void
-addreg(Adr *a, int rn)
-{
-       a->sym = nil;
-       a->node = nil;
-       a->offset = 0;
-       a->type = TYPE_REG;
-       a->reg = rn;
-       a->name = 0;
-
-       ostats.ncvtreg++;
-}
-
-uint32
+uint64
 RtoB(int r)
 {
 
        if(r < REG_AX || r > REG_DI)
                return 0;
-       return 1L << (r-REG_AX);
+       return 1ULL << (r-REG_AX);
 }
 
 int
-BtoR(uint32 b)
+BtoR(uint64 b)
 {
 
        b &= 0xffL;
@@ -1171,93 +94,19 @@ BtoR(uint32 b)
        return bitno(b) + REG_AX;
 }
 
-uint32
+uint64
 FtoB(int f)
 {
        if(f < REG_X0 || f > REG_X7)
                return 0;
-       return 1L << (f - REG_X0 + 8);
+       return 1ULL << (f - REG_X0 + 8);
 }
 
 int
-BtoF(uint32 b)
+BtoF(uint64 b)
 {
        b &= 0xFF00L;
        if(b == 0)
                return 0;
        return bitno(b) - 8 + REG_X0;
 }
-
-void
-dumpone(Flow *f, int isreg)
-{
-       int z;
-       Bits bit;
-       Reg *r;
-
-       print("%d:%P", f->loop, f->prog);
-       if(isreg) {
-               r = (Reg*)f;
-               for(z=0; z<BITS; z++)
-                       bit.b[z] =
-                               r->set.b[z] |
-                               r->use1.b[z] |
-                               r->use2.b[z] |
-                               r->refbehind.b[z] |
-                               r->refahead.b[z] |
-                               r->calbehind.b[z] |
-                               r->calahead.b[z] |
-                               r->regdiff.b[z] |
-                               r->act.b[z] |
-                                       0;
-               if(bany(&bit)) {
-                       print("\t");
-                       if(bany(&r->set))
-                               print(" s:%Q", r->set);
-                       if(bany(&r->use1))
-                               print(" u1:%Q", r->use1);
-                       if(bany(&r->use2))
-                               print(" u2:%Q", r->use2);
-                       if(bany(&r->refbehind))
-                               print(" rb:%Q ", r->refbehind);
-                       if(bany(&r->refahead))
-                               print(" ra:%Q ", r->refahead);
-                       if(bany(&r->calbehind))
-                               print(" cb:%Q ", r->calbehind);
-                       if(bany(&r->calahead))
-                               print(" ca:%Q ", r->calahead);
-                       if(bany(&r->regdiff))
-                               print(" d:%Q ", r->regdiff);
-                       if(bany(&r->act))
-                               print(" a:%Q ", r->act);
-               }
-       }
-       print("\n");
-}
-
-void
-dumpit(char *str, Flow *r0, int isreg)
-{
-       Flow *r, *r1;
-
-       print("\n%s\n", str);
-       for(r = r0; r != nil; r = r->link) {
-               dumpone(r, isreg);
-               r1 = r->p2;
-               if(r1 != nil) {
-                       print(" pred:");
-                       for(; r1 != nil; r1 = r1->p2link)
-                               print(" %.4ud", (int)r1->prog->pc);
-                       print("\n");
-               }
-               // Print successors if it's not just the next one
-               if(r->s1 != r->link || r->s2 != nil) {
-                       print(" succ:");
-                       if(r->s1 != nil)
-                               print(" %.4ud", (int)r->s1->prog->pc);
-                       if(r->s2 != nil)
-                               print(" %.4ud", (int)r->s2->prog->pc);
-                       print("\n");
-               }
-       }
-}
index 39db87d48aa1f5af9aa8e111a402f7b3dfddc9b6..5ee535de0505a190a48112a909473f6ddf715ba6 100644 (file)
@@ -73,14 +73,22 @@ main(int argc, char **argv)
        arch.ginscall = ginscall;
        arch.igen = igen;
        arch.linkarchinit = linkarchinit;
+       arch.peep = peep;
        arch.proginfo = proginfo;
        arch.regalloc = regalloc;
        arch.regfree = regfree;
-       arch.regopt = regopt;
        arch.regtyp = regtyp;
        arch.sameaddr = sameaddr;
        arch.smallindir = smallindir;
        arch.stackaddr = stackaddr;
+       arch.excludedregs = excludedregs;
+       arch.RtoB = RtoB;
+       arch.FtoB = RtoB;
+       arch.BtoR = BtoR;
+       arch.BtoF = BtoF;
+       arch.optoas = optoas;
+       arch.doregbits = doregbits;
+       arch.regnames = regnames;
        
        gcmain(argc, argv);
 }
index 235b8b9731fa32cdbd55cebad7012061c502c569..cc44f3586c16ab3fed97f07b54401c59037a503c 100644 (file)
@@ -154,3 +154,19 @@ int smallindir(Addr*, Addr*);
 int stackaddr(Addr*);
 Prog* unpatch(Prog*);
 
+
+/*
+ * reg.c
+ */
+uint64 excludedregs(void);
+uint64 RtoB(int);
+uint64 FtoB(int);
+int BtoR(uint64);
+int BtoF(uint64);
+uint64 doregbits(int);
+char** regnames(int*);
+
+/*
+ * peep.c
+ */
+void peep(Prog*);
index 7e8efb504810e288760e6ea503743893512a6812..7b342826852bbaf238fdf8af91c9a15f267f47ff 100644 (file)
@@ -7,7 +7,7 @@
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
 static Prog *appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset);
 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi);
index 25b670327988d5d5b912b89222218b2ec5388e42..79a34fb1f0b86e56ee02c943bf95add12e736b3e 100644 (file)
@@ -1,175 +1,6 @@
-// Derived from Inferno utils/6c/gc.h
-// http://code.google.com/p/inferno-os/source/browse/utils/6c/gc.h
-//
-//     Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
-//     Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
-//     Portions Copyright © 1997-1999 Vita Nuova Limited
-//     Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
-//     Portions Copyright © 2004,2006 Bruce Ellis
-//     Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
-//     Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
-//     Portions Copyright © 2009 The Go Authors.  All rights reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-
-#define        Z       N
-#define        Adr     Addr
-
-#define        BLOAD(r)        band(bnot(r->refbehind), r->refahead)
-#define        BSTORE(r)       band(bnot(r->calbehind), r->calahead)
-#define        LOAD(r)         (~r->refbehind.b[z] & r->refahead.b[z])
-#define        STORE(r)        (~r->calbehind.b[z] & r->calahead.b[z])
-
-#define        CLOAD   5
-#define        CREF    5
-#define        CINF    1000
-#define        LOOP    3
-
-typedef        struct  Reg     Reg;
-typedef        struct  Rgn     Rgn;
-
-/*c2go
-extern Node *Z;
-enum
-{
-       CLOAD = 5,
-       CREF = 5,
-       CINF = 1000,
-       LOOP = 3,
-};
-
-uint32 BLOAD(Reg*);
-uint32 BSTORE(Reg*);
-uint32 LOAD(Reg*);
-uint32 STORE(Reg*);
-*/
-
-// A Reg is a wrapper around a single Prog (one instruction) that holds
-// register optimization information while the optimizer runs.
-// r->prog is the instruction.
-// r->prog->opt points back to r.
-struct Reg
-{
-       Flow    f;
-
-       Bits    set;            // regopt variables written by this instruction.
-       Bits    use1;           // regopt variables read by prog->from.
-       Bits    use2;           // regopt variables read by prog->to.
-
-       // refahead/refbehind are the regopt variables whose current
-       // value may be used in the following/preceding instructions
-       // up to a CALL (or the value is clobbered).
-       Bits    refbehind;
-       Bits    refahead;
-       // calahead/calbehind are similar, but for variables in
-       // instructions that are reachable after hitting at least one
-       // CALL.
-       Bits    calbehind;
-       Bits    calahead;
-       Bits    regdiff;
-       Bits    act;
-
-       uint64  regu;           // register used bitmap
-};
-#define        R       ((Reg*)0)
-/*c2go extern Reg *R; */
-
-#define        NRGN    600
-/*c2go enum { NRGN = 600 }; */
-
-// A Rgn represents a single regopt variable over a region of code
-// where a register could potentially be dedicated to that variable.
-// The code encompassed by a Rgn is defined by the flow graph,
-// starting at enter, flood-filling forward while varno is refahead
-// and backward while varno is refbehind, and following branches.  A
-// single variable may be represented by multiple disjoint Rgns and
-// each Rgn may choose a different register for that variable.
-// Registers are allocated to regions greedily in order of descending
-// cost.
-struct Rgn
-{
-       Reg*    enter;
-       short   cost;
-       short   varno;
-       short   regno;
-};
-
-EXTERN int32   exregoffset;            // not set
-EXTERN int32   exfregoffset;           // not set
-EXTERN Reg     zreg;
-EXTERN Rgn     region[NRGN];
-EXTERN Rgn*    rgp;
-EXTERN int     nregion;
-EXTERN int     nvar;
-EXTERN int32   regbits;
-EXTERN int32   exregbits;              // TODO(austin) not used; remove
-EXTERN Bits    externs;
-EXTERN Bits    params;
-EXTERN Bits    consts;
-EXTERN Bits    addrs;
-EXTERN Bits    ivar;
-EXTERN Bits    ovar;
-EXTERN int     change;
-EXTERN int32   maxnr;
-
-EXTERN struct
-{
-       int32   ncvtreg;
-       int32   nspill;
-       int32   ndelmov;
-       int32   nvar;
-} ostats;
-
-/*
- * reg.c
- */
-int    rcmp(const void*, const void*);
-void   regopt(Prog*);
-void   addmove(Reg*, int, int, int);
-Bits   mkvar(Reg*, Adr*);
-void   prop(Reg*, Bits, Bits);
-void   synch(Reg*, Bits);
-uint64 allreg(uint64, Rgn*);
-void   paint1(Reg*, int);
-uint64 paint2(Reg*, int, int);
-void   paint3(Reg*, int, uint64, int);
-void   addreg(Adr*, int);
-void   dumpone(Flow*, int);
-void   dumpit(char*, Flow*, int);
-
-/*
- * peep.c
- */
-void   peep(Prog*);
-void   excise(Flow*);
-int    copyu(Prog*, Adr*, Adr*);
-
-uint64 RtoB(int);
-uint64 FtoB(int);
-int    BtoR(uint64);
-int    BtoF(uint64);
-
-/*
- * prog.c
- */
-void proginfo(ProgInfo*, Prog*);
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
 
 // Many Power ISA arithmetic and logical instructions come in four
 // standard variants.  These bits let us map between variants.
index 95ff0b4d5842a3eaf5374607504fa521d32a9469..1ca28dde0520485e56913bc2732566a15fcbd834 100644 (file)
@@ -31,6 +31,7 @@
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
+#include "../gc/popt.h"
 #include "opt.h"
 
 static int     regzer(Addr *a);
@@ -42,6 +43,7 @@ static int    copyau(Addr*, Addr*);
 static int     copysub(Addr*, Addr*, Addr*, int);
 static int     copysub1(Prog*, Addr*, Addr*, int);
 static int     copyau1(Prog *p, Addr *v);
+static int     copyu(Prog *p, Addr *v, Addr *s);
 
 static uint32  gactive;
 
@@ -568,7 +570,7 @@ copy1(Addr *v1, Addr *v2, Flow *r, int f)
 //     4 if v is set in one address and used in another (so addresses
 //       can be rewritten independently)
 //     0 otherwise (not touched)
-int
+static int
 copyu(Prog *p, Addr *v, Addr *s)
 {
        if(p->from3.type != TYPE_NONE)
index 1775993a97da7a82d2e58ce7b218d64b67e4568b..561249c358319022175b0e015f73df2fe522fcc6 100644 (file)
@@ -5,6 +5,7 @@
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
+#include "../gc/popt.h"
 #include "opt.h"
 
 enum {
index a7ee07e5473e95a3695b101ba2a38e780b641828..84e1747e8d04d7da5a47213311f94ae335838ee2 100644 (file)
 #include <u.h>
 #include <libc.h>
 #include "gg.h"
-#include "opt.h"
+#include "../gc/popt.h"
 
-#define        NREGVAR 64      /* 32 general + 32 floating */
-#define        REGBITS ((uint64)0xffffffffffffffffull)
-/*c2go enum {
-       NREGVAR = 64,
-       REGBITS = 0xffffffffffffffff,
+enum {
+       NREGVAR = 64,   /* 32 general + 32 floating */
 };
-*/
 
-static Reg*    firstr;
-static int     first   = 1;
-
-int
-rcmp(const void *a1, const void *a2)
-{
-       Rgn *p1, *p2;
-       int c1, c2;
-
-       p1 = (Rgn*)a1;
-       p2 = (Rgn*)a2;
-       c1 = p2->cost;
-       c2 = p1->cost;
-       if(c1 -= c2)
-               return c1;
-       return p2->varno - p1->varno;
-}
-
-static void
-setaddrs(Bits bit)
-{
-       int i, n;
-       Var *v;
-       Node *node;
-
-       while(bany(&bit)) {
-               // convert each bit to a variable
-               i = bnum(bit);
-               node = var[i].node;
-               n = var[i].name;
-               biclr(&bit, i);
-
-               // disable all pieces of that variable
-               for(i=0; i<nvar; i++) {
-                       v = var+i;
-                       if(v->node == node && v->name == n)
-                               v->addr = 2;
-               }
-       }
-}
 
 static char* regname[] = {
        ".R0",
@@ -149,1059 +105,32 @@ static char* regname[] = {
        ".F31",
 };
 
-static Node* regnodes[NREGVAR];
-
-static void walkvardef(Node *n, Reg *r, int active);
-
-void
-regopt(Prog *firstp)
+char**
+regnames(int *n)
 {
-       Reg *r, *r1;
-       Prog *p;
-       Graph *g;
-       ProgInfo info;
-       int i, z, active;
-       uint64 vreg, usedreg;
-       Bits bit;
-
-       if(first) {
-               fmtinstall('Q', Qconv);
-               first = 0;
-       }
-
-       mergetemp(firstp);
+       *n = NREGVAR;
+       return regname;
+}
 
-       /*
-        * control flow is more complicated in generated go code
-        * than in generated c code.  define pseudo-variables for
-        * registers, so we have complete register usage information.
-        */
-       nvar = NREGVAR;
-       memset(var, 0, NREGVAR*sizeof var[0]);
-       for(i=0; i<NREGVAR; i++) {
-               if(regnodes[i] == N)
-                       regnodes[i] = newname(lookup(regname[i]));
-               var[i].node = regnodes[i];
-       }
+uint64
+excludedregs(void)
+{
+       uint64 regbits;
 
        // Exclude registers with fixed functions
        regbits = (1<<0)|RtoB(REGSP)|RtoB(REGG)|RtoB(REGTLS);
        // Also exclude floating point registers with fixed constants
        regbits |= RtoB(REG_F27)|RtoB(REG_F28)|RtoB(REG_F29)|RtoB(REG_F30)|RtoB(REG_F31);
-       externs = zbits;
-       params = zbits;
-       consts = zbits;
-       addrs = zbits;
-       ivar = zbits;
-       ovar = zbits;
-
-       /*
-        * pass 1
-        * build aux data structure
-        * allocate pcs
-        * find use and set of variables
-        */
-       g = flowstart(firstp, sizeof(Reg));
-       if(g == nil) {
-               for(i=0; i<nvar; i++)
-                       var[i].node->opt = nil;
-               return;
-       }
-
-       firstr = (Reg*)g->start;
-
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               p = r->f.prog;
-               if(p->as == AVARDEF || p->as == AVARKILL)
-                       continue;
-               proginfo(&info, p);
-
-               // Avoid making variables for direct-called functions.
-               if(p->as == ABL && p->to.name == NAME_EXTERN)
-                       continue;
-
-               // from vs to doesn't matter for registers
-               r->use1.b[0] |= info.reguse | info.regindex;
-               r->set.b[0] |= info.regset;
-
-               // Compute used register for from
-               bit = mkvar(r, &p->from);
-               if(info.flags & LeftAddr)
-                       setaddrs(bit);
-               if(info.flags & LeftRead)
-                       for(z=0; z<BITS; z++)
-                               r->use1.b[z] |= bit.b[z];
-
-               // Compute used register for reg
-               if(info.flags & RegRead)
-                       r->use1.b[0] |= RtoB(p->reg);
-
-               // Currently we never generate three register forms.
-               // If we do, this will need to change.
-               if(p->from3.type != TYPE_NONE)
-                       fatal("regopt not implemented for from3");
-
-               // Compute used register for to
-               bit = mkvar(r, &p->to);
-               if(info.flags & RightAddr)
-                       setaddrs(bit);
-               if(info.flags & RightRead)
-                       for(z=0; z<BITS; z++)
-                               r->use2.b[z] |= bit.b[z];
-               if(info.flags & RightWrite)
-                       for(z=0; z<BITS; z++)
-                               r->set.b[z] |= bit.b[z];
-       }
-
-       for(i=0; i<nvar; i++) {
-               Var *v = var+i;
-               if(v->addr) {
-                       bit = blsh(i);
-                       for(z=0; z<BITS; z++)
-                               addrs.b[z] |= bit.b[z];
-               }
-
-               if(debug['R'] && debug['v'])
-                       print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
-                               i, v->addr, v->etype, v->width, v->node, v->offset);
-       }
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass1", &firstr->f, 1);
-
-       /*
-        * pass 2
-        * find looping structure
-        */
-       flowrpo(g);
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass2", &firstr->f, 1);
-
-       /*
-        * pass 2.5
-        * iterate propagating fat vardef covering forward
-        * r->act records vars with a VARDEF since the last CALL.
-        * (r->act will be reused in pass 5 for something else,
-        * but we'll be done with it by then.)
-        */
-       active = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               r->f.active = 0;
-               r->act = zbits;
-       }
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               p = r->f.prog;
-               if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) {
-                       active++;
-                       walkvardef(p->to.node, r, active);
-               }
-       }
-
-       /*
-        * pass 3
-        * iterate propagating usage
-        *      back until flow graph is complete
-        */
-loop1:
-       change = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->f.active = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               if(r->f.prog->as == ARET)
-                       prop(r, zbits, zbits);
-loop11:
-       /* pick up unreachable code */
-       i = 0;
-       for(r = firstr; r != R; r = r1) {
-               r1 = (Reg*)r->f.link;
-               if(r1 && r1->f.active && !r->f.active) {
-                       prop(r, zbits, zbits);
-                       i = 1;
-               }
-       }
-       if(i)
-               goto loop11;
-       if(change)
-               goto loop1;
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass3", &firstr->f, 1);
-
-       /*
-        * pass 4
-        * iterate propagating register/variable synchrony
-        *      forward until graph is complete
-        */
-loop2:
-       change = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->f.active = 0;
-       synch(firstr, zbits);
-       if(change)
-               goto loop2;
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass4", &firstr->f, 1);
-
-       /*
-        * pass 4.5
-        * move register pseudo-variables into regu.
-        */
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
-
-               r->set.b[0] &= ~REGBITS;
-               r->use1.b[0] &= ~REGBITS;
-               r->use2.b[0] &= ~REGBITS;
-               r->refbehind.b[0] &= ~REGBITS;
-               r->refahead.b[0] &= ~REGBITS;
-               r->calbehind.b[0] &= ~REGBITS;
-               r->calahead.b[0] &= ~REGBITS;
-               r->regdiff.b[0] &= ~REGBITS;
-               r->act.b[0] &= ~REGBITS;
-       }
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass4.5", &firstr->f, 1);
-
-       /*
-        * pass 5
-        * isolate regions
-        * calculate costs (paint1)
-        */
-       r = firstr;
-       if(r) {
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
-                         ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
-               if(bany(&bit) && !r->f.refset) {
-                       // should never happen - all variables are preset
-                       if(debug['w'])
-                               print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
-                       r->f.refset = 1;
-               }
-       }
-       for(r = firstr; r != R; r = (Reg*)r->f.link)
-               r->act = zbits;
-       rgp = region;
-       nregion = 0;
-       for(r = firstr; r != R; r = (Reg*)r->f.link) {
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = r->set.b[z] &
-                         ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
-               if(bany(&bit) && !r->f.refset) {
-                       if(debug['w'])
-                               print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
-                       r->f.refset = 1;
-                       excise(&r->f);
-               }
-               for(z=0; z<BITS; z++)
-                       bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
-               while(bany(&bit)) {
-                       i = bnum(bit);
-                       rgp->enter = r;
-                       rgp->varno = i;
-                       change = 0;
-                       paint1(r, i);
-                       biclr(&bit, i);
-                       if(change <= 0)
-                               continue;
-                       rgp->cost = change;
-                       nregion++;
-                       if(nregion >= NRGN) {
-                               if(debug['R'] && debug['v'])
-                                       print("too many regions\n");
-                               goto brk;
-                       }
-                       rgp++;
-               }
-       }
-brk:
-       qsort(region, nregion, sizeof(region[0]), rcmp);
-
-       if(debug['R'] && debug['v'])
-               dumpit("pass5", &firstr->f, 1);
-
-       /*
-        * pass 6
-        * determine used registers (paint2)
-        * replace code (paint3)
-        */
-       rgp = region;
-       if(debug['R'] && debug['v'])
-               print("\nregisterizing\n");
-       for(i=0; i<nregion; i++) {
-               if(debug['R'] && debug['v'])
-                       print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
-               bit = blsh(rgp->varno);
-               usedreg = paint2(rgp->enter, rgp->varno, 0);
-               vreg = allreg(usedreg, rgp);
-               if(rgp->regno != 0) {
-                       if(debug['R'] && debug['v']) {
-                               Var *v;
-
-                               v = var + rgp->varno;
-                               print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%llx vreg=%llx\n",
-                                               v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg);
-                       }
-                       paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
-               }
-               rgp++;
-       }
-
-       /*
-        * free aux structures. peep allocates new ones.
-        */
-       for(i=0; i<nvar; i++)
-               var[i].node->opt = nil;
-       flowend(g);
-       firstr = R;
-
-       if(debug['R'] && debug['v']) {
-               // Rebuild flow graph, since we inserted instructions
-               g = flowstart(firstp, sizeof(Reg));
-               firstr = (Reg*)g->start;
-               dumpit("pass6", &firstr->f, 1);
-               flowend(g);
-               firstr = R;
-       }
-
-       /*
-        * pass 7
-        * peep-hole on basic block
-        */
-       if(!debug['R'] || debug['P'])
-               peep(firstp);
-
-       /*
-        * eliminate nops
-        */
-       for(p=firstp; p!=P; p=p->link) {
-               while(p->link != P && p->link->as == ANOP)
-                       p->link = p->link->link;
-               if(p->to.type == TYPE_BRANCH)
-                       while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
-                               p->to.u.branch = p->to.u.branch->link;
-       }
-
-       if(debug['R']) {
-               if(ostats.ncvtreg ||
-                  ostats.nspill ||
-                  ostats.ndelmov ||
-                  ostats.nvar ||
-                  0)
-                       print("\nstats\n");
-
-               if(ostats.ncvtreg)
-                       print(" %4d cvtreg\n", ostats.ncvtreg);
-               if(ostats.nspill)
-                       print(" %4d spill\n", ostats.nspill);
-               if(ostats.ndelmov)
-                       print(" %4d delmov\n", ostats.ndelmov);
-               if(ostats.nvar)
-                       print(" %4d var\n", ostats.nvar);
-
-               memset(&ostats, 0, sizeof(ostats));
-       }
-
-       return;
-}
-
-static void
-walkvardef(Node *n, Reg *r, int active)
-{
-       Reg *r1, *r2;
-       int bn;
-       Var *v;
-       
-       for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
-               if(r1->f.active == active)
-                       break;
-               r1->f.active = active;
-               if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
-                       break;
-               for(v=n->opt; v!=nil; v=v->nextinnode) {
-                       bn = v - var;
-                       biset(&r1->act, bn);
-               }
-               if(r1->f.prog->as == ABL)
-                       break;
-       }
-
-       for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
-               if(r2->f.s2 != nil)
-                       walkvardef(n, (Reg*)r2->f.s2, active);
-}
-
-/*
- * add mov b,rn
- * just after r
- */
-void
-addmove(Reg *r, int bn, int rn, int f)
-{
-       Prog *p, *p1, *p2;
-       Adr *a;
-       Var *v;
-
-       p1 = mal(sizeof(*p1));
-       *p1 = zprog;
-       p = r->f.prog;
-       
-       // If there's a stack fixup coming (ADD $n,R1 after BL newproc or BL deferproc),
-       // delay the load until after the fixup.
-       p2 = p->link;
-       if(p2 && p2->as == AADD && p2->to.reg == REGSP && p2->to.type == TYPE_REG)
-               p = p2;
-
-       p1->link = p->link;
-       p->link = p1;
-       p1->lineno = p->lineno;
-
-       v = var + bn;
-
-       a = &p1->to;
-       a->name = v->name;
-       a->node = v->node;
-       a->sym = linksym(v->node->sym);
-       a->offset = v->offset;
-       a->etype = v->etype;
-       a->type = TYPE_MEM;
-       if(a->etype == TARRAY)
-               a->type = TYPE_ADDR;
-       else if(a->sym == nil)
-               a->type = TYPE_CONST;
-
-       if(v->addr)
-               fatal("addmove: shouldn't be doing this %A\n", a);
-
-       switch(v->etype) {
-       default:
-               print("What is this %E\n", v->etype);
-
-       case TINT8:
-               p1->as = AMOVB;
-               break;
-       case TBOOL:
-       case TUINT8:
-//print("movbu %E %d %S\n", v->etype, bn, v->sym);
-               p1->as = AMOVBZ;
-               break;
-       case TINT16:
-               p1->as = AMOVH;
-               break;
-       case TUINT16:
-               p1->as = AMOVHZ;
-               break;
-       case TINT32:
-               p1->as = AMOVW;
-               break;
-       case TUINT32:
-       case TPTR32:
-               p1->as = AMOVWZ;
-               break;
-       case TINT64:
-       case TUINT64:
-       case TPTR64:
-               p1->as = AMOVD;
-               break;
-       case TFLOAT32:
-               p1->as = AFMOVS;
-               break;
-       case TFLOAT64:
-               p1->as = AFMOVD;
-               break;
-       }
-
-       p1->from.type = TYPE_REG;
-       p1->from.reg = rn;
-       if(!f) {
-               p1->from = *a;
-               *a = zprog.from;
-               a->type = TYPE_REG;
-               a->reg = rn;
-               if(v->etype == TUINT8 || v->etype == TBOOL)
-                       p1->as = AMOVBZ;
-               if(v->etype == TUINT16)
-                       p1->as = AMOVHZ;
-       }
-       if(debug['R'])
-               print("%P\t.a%P\n", p, p1);
-       ostats.nspill++;
-}
-
-static int
-overlap(int64 o1, int w1, int64 o2, int w2)
-{
-       int64 t1, t2;
-
-       t1 = o1+w1;
-       t2 = o2+w2;
-
-       if(!(t1 > o2 && t2 > o1))
-               return 0;
-
-       return 1;
-}
-
-Bits
-mkvar(Reg *r, Adr *a)
-{
-       USED(r);
-       Var *v;
-       int i, t, n, et, z, flag;
-       int64 w;
-       int64 o;
-       Bits bit;
-       Node *node;
-
-       // mark registers used
-       t = a->type;
-       switch(t) {
-       default:
-               print("type %d %d %D\n", t, a->name, a);
-               goto none;
-
-       case TYPE_NONE:
-               goto none;
-
-       case TYPE_BRANCH:
-       case TYPE_CONST:
-       case TYPE_FCONST:
-       case TYPE_SCONST:
-       case TYPE_MEM:
-       case TYPE_ADDR:
-               break;
-
-       case TYPE_REG:
-               if(a->reg != 0) {
-                       bit = zbits;
-                       bit.b[0] = RtoB(a->reg);
-                       return bit;
-               }
-               break;
-       }
-
-       switch(a->name) {
-       default:
-               goto none;
-
-       case NAME_EXTERN:
-       case NAME_STATIC:
-       case NAME_AUTO:
-       case NAME_PARAM:
-               n = a->name;
-               break;
-       }
-
-       node = a->node;
-       if(node == N || node->op != ONAME || node->orig == N)
-               goto none;
-       node = node->orig;
-       if(node->orig != node)
-               fatal("%D: bad node", a);
-       if(node->sym == S || node->sym->name[0] == '.')
-               goto none;
-       et = a->etype;
-       o = a->offset;
-       w = a->width;
-       if(w < 0)
-               fatal("bad width %lld for %D", w, a);
-
-       flag = 0;
-       for(i=0; i<nvar; i++) {
-               v = var+i;
-               if(v->node == node && v->name == n) {
-                       if(v->offset == o)
-                       if(v->etype == et)
-                       if(v->width == w)
-                               return blsh(i);
-
-                       // if they overlap, disable both
-                       if(overlap(v->offset, v->width, o, w)) {
-                               v->addr = 1;
-                               flag = 1;
-                       }
-               }
-       }
-
-       switch(et) {
-       case 0:
-       case TFUNC:
-               goto none;
-       }
-
-       if(nvar >= NVAR) {
-               if(debug['w'] > 1 && node != N)
-                       fatal("variable not optimized: %#N", node);
-               
-               // If we're not tracking a word in a variable, mark the rest as
-               // having its address taken, so that we keep the whole thing
-               // live at all calls. otherwise we might optimize away part of
-               // a variable but not all of it.
-               for(i=0; i<nvar; i++) {
-                       v = var+i;
-                       if(v->node == node)
-                               v->addr = 1;
-               }
-               goto none;
-       }
-
-       i = nvar;
-       nvar++;
-       v = var+i;
-       v->offset = o;
-       v->name = n;
-       v->etype = et;
-       v->width = w;
-       v->addr = flag;         // funny punning
-       v->node = node;
-       
-       // node->opt is the head of a linked list
-       // of Vars within the given Node, so that
-       // we can start at a Var and find all the other
-       // Vars in the same Go variable.
-       v->nextinnode = node->opt;
-       node->opt = v;
-
-       bit = blsh(i);
-       if(n == NAME_EXTERN || n == NAME_STATIC)
-               for(z=0; z<BITS; z++)
-                       externs.b[z] |= bit.b[z];
-       if(n == NAME_PARAM)
-               for(z=0; z<BITS; z++)
-                       params.b[z] |= bit.b[z];
-
-       if(node->class == PPARAM)
-               for(z=0; z<BITS; z++)
-                       ivar.b[z] |= bit.b[z];
-       if(node->class == PPARAMOUT)
-               for(z=0; z<BITS; z++)
-                       ovar.b[z] |= bit.b[z];
-
-       // Treat values with their address taken as live at calls,
-       // because the garbage collector's liveness analysis in ../gc/plive.c does.
-       // These must be consistent or else we will elide stores and the garbage
-       // collector will see uninitialized data.
-       // The typical case where our own analysis is out of sync is when the
-       // node appears to have its address taken but that code doesn't actually
-       // get generated and therefore doesn't show up as an address being
-       // taken when we analyze the instruction stream.
-       // One instance of this case is when a closure uses the same name as
-       // an outer variable for one of its own variables declared with :=.
-       // The parser flags the outer variable as possibly shared, and therefore
-       // sets addrtaken, even though it ends up not being actually shared.
-       // If we were better about _ elision, _ = &x would suffice too.
-       // The broader := in a closure problem is mentioned in a comment in
-       // closure.c:/^typecheckclosure and dcl.c:/^oldname.
-       if(node->addrtaken)
-               v->addr = 1;
-
-       // Disable registerization for globals, because:
-       // (1) we might panic at any time and we want the recovery code
-       // to see the latest values (issue 1304).
-       // (2) we don't know what pointers might point at them and we want
-       // loads via those pointers to see updated values and vice versa (issue 7995).
-       //
-       // Disable registerization for results if using defer, because the deferred func
-       // might recover and return, causing the current values to be used.
-       if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
-               v->addr = 1;
-
-       if(debug['R'])
-               print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
-       ostats.nvar++;
-
-       return bit;
-
-none:
-       return zbits;
-}
-
-void
-prop(Reg *r, Bits ref, Bits cal)
-{
-       Reg *r1, *r2;
-       int z, i, j;
-       Var *v, *v1;
-
-       for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
-               for(z=0; z<BITS; z++) {
-                       ref.b[z] |= r1->refahead.b[z];
-                       if(ref.b[z] != r1->refahead.b[z]) {
-                               r1->refahead.b[z] = ref.b[z];
-                               change++;
-                       }
-                       cal.b[z] |= r1->calahead.b[z];
-                       if(cal.b[z] != r1->calahead.b[z]) {
-                               r1->calahead.b[z] = cal.b[z];
-                               change++;
-                       }
-               }
-               switch(r1->f.prog->as) {
-               case ABL:
-                       if(noreturn(r1->f.prog))
-                               break;
-
-                       // Mark all input variables (ivar) as used, because that's what the
-                       // liveness bitmaps say. The liveness bitmaps say that so that a
-                       // panic will not show stale values in the parameter dump.
-                       // Mark variables with a recent VARDEF (r1->act) as used,
-                       // so that the optimizer flushes initializations to memory,
-                       // so that if a garbage collection happens during this CALL,
-                       // the collector will see initialized memory. Again this is to
-                       // match what the liveness bitmaps say.
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
-                               ref.b[z] = 0;
-                       }
-                       
-                       // cal.b is the current approximation of what's live across the call.
-                       // Every bit in cal.b is a single stack word. For each such word,
-                       // find all the other tracked stack words in the same Go variable
-                       // (struct/slice/string/interface) and mark them live too.
-                       // This is necessary because the liveness analysis for the garbage
-                       // collector works at variable granularity, not at word granularity.
-                       // It is fundamental for slice/string/interface: the garbage collector
-                       // needs the whole value, not just some of the words, in order to
-                       // interpret the other bits correctly. Specifically, slice needs a consistent
-                       // ptr and cap, string needs a consistent ptr and len, and interface
-                       // needs a consistent type word and data word.
-                       for(z=0; z<BITS; z++) {
-                               if(cal.b[z] == 0)
-                                       continue;
-                               for(i=0; i<64; i++) {
-                                       if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
-                                               continue;
-                                       v = var+z*64+i;
-                                       if(v->node->opt == nil) // v represents fixed register, not Go variable
-                                               continue;
-
-                                       // v->node->opt is the head of a linked list of Vars
-                                       // corresponding to tracked words from the Go variable v->node.
-                                       // Walk the list and set all the bits.
-                                       // For a large struct this could end up being quadratic:
-                                       // after the first setting, the outer loop (for z, i) would see a 1 bit
-                                       // for all of the remaining words in the struct, and for each such
-                                       // word would go through and turn on all the bits again.
-                                       // To avoid the quadratic behavior, we only turn on the bits if
-                                       // v is the head of the list or if the head's bit is not yet turned on.
-                                       // This will set the bits at most twice, keeping the overall loop linear.
-                                       v1 = v->node->opt;
-                                       j = v1 - var;
-                                       if(v == v1 || !btest(&cal, j)) {
-                                               for(; v1 != nil; v1 = v1->nextinnode) {
-                                                       j = v1 - var;
-                                                       biset(&cal, j);
-                                               }
-                                       }
-                               }
-                       }
-                       break;
-
-               case ATEXT:
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] = 0;
-                               ref.b[z] = 0;
-                       }
-                       break;
-
-               case ARET:
-                       for(z=0; z<BITS; z++) {
-                               cal.b[z] = externs.b[z] | ovar.b[z];
-                               ref.b[z] = 0;
-                       }
-                       break;
-               }
-               for(z=0; z<BITS; z++) {
-                       ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
-                               r1->use1.b[z] | r1->use2.b[z];
-                       cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
-                       r1->refbehind.b[z] = ref.b[z];
-                       r1->calbehind.b[z] = cal.b[z];
-               }
-               if(r1->f.active)
-                       break;
-               r1->f.active = 1;
-       }
-       for(; r != r1; r = (Reg*)r->f.p1)
-               for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
-                       prop(r2, r->refbehind, r->calbehind);
-}
-
-void
-synch(Reg *r, Bits dif)
-{
-       Reg *r1;
-       int z;
-
-       for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
-               for(z=0; z<BITS; z++) {
-                       dif.b[z] = (dif.b[z] &
-                               ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
-                                       r1->set.b[z] | r1->regdiff.b[z];
-                       if(dif.b[z] != r1->regdiff.b[z]) {
-                               r1->regdiff.b[z] = dif.b[z];
-                               change++;
-                       }
-               }
-               if(r1->f.active)
-                       break;
-               r1->f.active = 1;
-               for(z=0; z<BITS; z++)
-                       dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
-               if(r1->f.s2 != nil)
-                       synch((Reg*)r1->f.s2, dif);
-       }
+       return regbits;
 }
 
 uint64
-allreg(uint64 b, Rgn *r)
+doregbits(int r)
 {
-       Var *v;
-       int i;
-
-       v = var + r->varno;
-       r->regno = 0;
-       switch(v->etype) {
-
-       default:
-               fatal("unknown etype %d/%E", bitno(b), v->etype);
-               break;
-
-       case TINT8:
-       case TUINT8:
-       case TINT16:
-       case TUINT16:
-       case TINT32:
-       case TUINT32:
-       case TINT64:
-       case TUINT64:
-       case TINT:
-       case TUINT:
-       case TUINTPTR:
-       case TBOOL:
-       case TPTR32:
-       case TPTR64:
-               i = BtoR(~b);
-               if(i && r->cost > 0) {
-                       r->regno = i;
-                       return RtoB(i);
-               }
-               break;
-
-       case TFLOAT32:
-       case TFLOAT64:
-               i = BtoF(~b);
-               if(i && r->cost > 0) {
-                       r->regno = i;
-                       return RtoB(i);
-               }
-               break;
-       }
+       USED(r);
        return 0;
 }
 
-void
-paint1(Reg *r, int bn)
-{
-       Reg *r1;
-       int z;
-       uint64 bb;
-
-       z = bn/64;
-       bb = 1LL<<(bn%64);
-       if(r->act.b[z] & bb)
-               return;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(r1->act.b[z] & bb)
-                       break;
-               r = r1;
-       }
-
-       if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
-               change -= CLOAD * r->f.loop;
-       }
-       for(;;) {
-               r->act.b[z] |= bb;
-
-               if(r->f.prog->as != ANOP) { // don't give credit for NOPs
-                       if(r->use1.b[z] & bb)
-                               change += CREF * r->f.loop;
-                       if((r->use2.b[z]|r->set.b[z]) & bb)
-                               change += CREF * r->f.loop;
-               }
-
-               if(STORE(r) & r->regdiff.b[z] & bb) {
-                       change -= CLOAD * r->f.loop;
-               }
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       paint1(r1, bn);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               paint1(r1, bn);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(r->act.b[z] & bb)
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-}
-
-uint64
-paint2(Reg *r, int bn, int depth)
-{
-       Reg *r1;
-       int z;
-       uint64 bb, vreg;
-
-       z = bn/64;
-       bb = 1LL << (bn%64);
-       vreg = regbits;
-       if(!(r->act.b[z] & bb))
-               return vreg;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(!(r1->act.b[z] & bb))
-                       break;
-               r = r1;
-       }
-       for(;;) {
-               if(debug['R'] && debug['v'])
-                       print("  paint2 %d %P\n", depth, r->f.prog);
-
-               r->act.b[z] &= ~bb;
-
-               vreg |= r->regu;
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       vreg |= paint2(r1, bn, depth+1);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               vreg |= paint2(r1, bn, depth+1);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(!(r->act.b[z] & bb))
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-       return vreg;
-}
-
-void
-paint3(Reg *r, int bn, uint64 rb, int rn)
-{
-       Reg *r1;
-       Prog *p;
-       int z;
-       uint64 bb;
-
-       z = bn/64;
-       bb = 1LL << (bn%64);
-       if(r->act.b[z] & bb)
-               return;
-       for(;;) {
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.p1;
-               if(r1 == R)
-                       break;
-               if(!(r1->refahead.b[z] & bb))
-                       break;
-               if(r1->act.b[z] & bb)
-                       break;
-               r = r1;
-       }
-
-       if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
-               addmove(r, bn, rn, 0);
-       for(;;) {
-               r->act.b[z] |= bb;
-               p = r->f.prog;
-
-               if(r->use1.b[z] & bb) {
-                       if(debug['R'] && debug['v'])
-                               print("%P", p);
-                       addreg(&p->from, rn);
-                       if(debug['R'] && debug['v'])
-                               print(" ===change== %P\n", p);
-               }
-               if((r->use2.b[z]|r->set.b[z]) & bb) {
-                       if(debug['R'] && debug['v'])
-                               print("%P", p);
-                       addreg(&p->to, rn);
-                       if(debug['R'] && debug['v'])
-                               print(" ===change== %P\n", p);
-               }
-
-               if(STORE(r) & r->regdiff.b[z] & bb)
-                       addmove(r, bn, rn, 1);
-               r->regu |= rb;
-
-               if(r->refbehind.b[z] & bb)
-                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
-                               if(r1->refahead.b[z] & bb)
-                                       paint3(r1, bn, rb, rn);
-
-               if(!(r->refahead.b[z] & bb))
-                       break;
-               r1 = (Reg*)r->f.s2;
-               if(r1 != R)
-                       if(r1->refbehind.b[z] & bb)
-                               paint3(r1, bn, rb, rn);
-               r = (Reg*)r->f.s1;
-               if(r == R)
-                       break;
-               if(r->act.b[z] & bb)
-                       break;
-               if(!(r->refbehind.b[z] & bb))
-                       break;
-       }
-}
-
-void
-addreg(Adr *a, int rn)
-{
-       a->sym = nil;
-       a->node = nil;
-       a->name = NAME_NONE;
-       a->type = TYPE_REG;
-       a->reg = rn;
-
-       ostats.ncvtreg++;
-}
-
 /*
  * track register variables including external registers:
  *     bit     reg
@@ -1241,78 +170,3 @@ BtoF(uint64 b)
                return 0;
        return bitno(b) + REG_F0;
 }
-
-void
-dumpone(Flow *f, int isreg)
-{
-       int z;
-       Bits bit;
-       Reg *r;
-
-       print("%d:%P", f->loop, f->prog);
-       if(isreg) {     
-               r = (Reg*)f;
-               for(z=0; z<BITS; z++)
-                       bit.b[z] =
-                               r->set.b[z] |
-                               r->use1.b[z] |
-                               r->use2.b[z] |
-                               r->refbehind.b[z] |
-                               r->refahead.b[z] |
-                               r->calbehind.b[z] |
-                               r->calahead.b[z] |
-                               r->regdiff.b[z] |
-                               r->act.b[z] |
-                                       0;
-               if(bany(&bit)) {
-                       print("\t");
-                       if(bany(&r->set))
-                               print(" s:%Q", r->set);
-                       if(bany(&r->use1))
-                               print(" u1:%Q", r->use1);
-                       if(bany(&r->use2))
-                               print(" u2:%Q", r->use2);
-                       if(bany(&r->refbehind))
-                               print(" rb:%Q ", r->refbehind);
-                       if(bany(&r->refahead))
-                               print(" ra:%Q ", r->refahead);
-                       if(bany(&r->calbehind))
-                               print(" cb:%Q ", r->calbehind);
-                       if(bany(&r->calahead))
-                               print(" ca:%Q ", r->calahead);
-                       if(bany(&r->regdiff))
-                               print(" d:%Q ", r->regdiff);
-                       if(bany(&r->act))
-                               print(" a:%Q ", r->act);
-               }
-       }
-       print("\n");
-}
-
-
-void
-dumpit(char *str, Flow *r0, int isreg)
-{
-       Flow *r, *r1;
-
-       print("\n%s\n", str);
-       for(r = r0; r != nil; r = r->link) {
-               dumpone(r, isreg);
-               r1 = r->p2;
-               if(r1 != nil) {
-                       print(" pred:");
-                       for(; r1 != nil; r1 = r1->p2link)
-                               print(" %.4ud", (int)r1->prog->pc);
-                       print("\n");
-               }
-               // Print successors if it's not just the next one
-               if(r->s1 != r->link || r->s2 != nil) {
-                       print(" succ:");
-                       if(r->s1 != nil)
-                               print(" %.4ud", (int)r->s1->prog->pc);
-                       if(r->s2 != nil)
-                               print(" %.4ud", (int)r->s2->prog->pc);
-                       print("\n");
-               }
-       }
-}
index 0674b2ce6be7ddf6d36728b55ea3f5200368653d..b53655b41294906e31b94e9147d43757d3dd00f7 100644 (file)
@@ -1679,14 +1679,22 @@ struct Arch
        void    (*ginscall)(Node*, int);
        void    (*igen)(Node*, Node*, Node*);
        void (*linkarchinit)(void);
+       void (*peep)(Prog*);
        void (*proginfo)(ProgInfo*, Prog*);
        void (*regalloc)(Node*, Type*, Node*);
        void (*regfree)(Node*);
-       void (*regopt)(Prog*);
        int (*regtyp)(Addr*);
        int (*sameaddr)(Addr*, Addr*);
        int (*smallindir)(Addr*, Addr*);
        int (*stackaddr)(Addr*);
+       uint64 (*excludedregs)(void);
+       uint64 (*RtoB)(int);
+       uint64 (*FtoB)(int);
+       int (*BtoR)(uint64);
+       int (*BtoF)(uint64);
+       int (*optoas)(int, Type*);
+       uint64 (*doregbits)(int);
+       char **(*regnames)(int*);
 };
 
 void afunclit(Addr*, Node*);
@@ -1716,6 +1724,7 @@ Prog* unpatch(Prog*);
 void datagostring(Strlit *sval, Addr *a);
 int ismem(Node*);
 int samereg(Node*, Node*);
+void   regopt(Prog*);
 
 EXTERN int32   pcloc;
 
index 0b37bd0856249dabdcec4492732fce26bdadd23f..0774e061e5b1e5aab0143e47d4c9f19fde54f346 100644 (file)
@@ -302,7 +302,7 @@ compile(Node *fn)
 
        fixjmp(ptxt);
        if(!debug['N'] || debug['R'] || debug['P']) {
-               arch.regopt(ptxt);
+               regopt(ptxt);
                nilopt(ptxt);
        }
        arch.expandchecks(ptxt);
similarity index 93%
rename from src/cmd/6g/opt.h
rename to src/cmd/gc/popt.h
index 11befb6ad1129ba881f14ed4d817fd5aaa66882a..833f69a21202b5b18025d36a28eb22e3abd69746 100644 (file)
@@ -28,7 +28,6 @@
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
 
-
 #define        Z       N
 #define        Adr     Addr
 
@@ -91,7 +90,7 @@ struct        Reg
        Bits    regdiff;
        Bits    act;
 
-       int32   regu;           // register used bitmap
+       uint64  regu;           // register used bitmap
 };
 #define        R       ((Reg*)0)
 /*c2go extern Reg *R; */
@@ -116,15 +115,12 @@ struct    Rgn
        short   regno;
 };
 
-EXTERN int32   exregoffset;            // not set
-EXTERN int32   exfregoffset;           // not set
 EXTERN Reg     zreg;
 EXTERN Rgn     region[NRGN];
 EXTERN Rgn*    rgp;
 EXTERN int     nregion;
 EXTERN int     nvar;
-EXTERN int32   regbits;
-EXTERN int32   exregbits;
+EXTERN uint64  regbits;
 EXTERN Bits    externs;
 EXTERN Bits    params;
 EXTERN Bits    consts;
@@ -153,28 +149,23 @@ void      addmove(Reg*, int, int, int);
 Bits   mkvar(Reg*, Adr*);
 void   prop(Reg*, Bits, Bits);
 void   synch(Reg*, Bits);
-uint32 allreg(uint32, Rgn*);
+uint64 allreg(uint64, Rgn*);
 void   paint1(Reg*, int);
-uint32 paint2(Reg*, int, int);
-void   paint3(Reg*, int, uint32, int);
+uint64 paint2(Reg*, int, int);
+void   paint3(Reg*, int, uint64, int);
 void   addreg(Adr*, int);
 void   dumpone(Flow*, int);
 void   dumpit(char*, Flow*, int);
 
 /*
  * peep.c
- */
 void   peep(Prog*);
 void   excise(Flow*);
 int    copyu(Prog*, Adr*, Adr*);
-
-uint32 RtoB(int);
-uint32 FtoB(int);
-int    BtoR(uint32);
-int    BtoF(uint32);
+ */
 
 /*
  * prog.c
- */
 
 void proginfo(ProgInfo*, Prog*);
+ */
diff --git a/src/cmd/gc/reg.c b/src/cmd/gc/reg.c
new file mode 100644 (file)
index 0000000..d7ffa17
--- /dev/null
@@ -0,0 +1,1193 @@
+// Derived from Inferno utils/6c/reg.c
+// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
+//
+//     Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//     Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+//     Portions Copyright © 1997-1999 Vita Nuova Limited
+//     Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+//     Portions Copyright © 2004,2006 Bruce Ellis
+//     Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+//     Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+//     Portions Copyright © 2009 The Go Authors.  All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <u.h>
+#include <libc.h>
+#include "go.h"
+#include "popt.h"
+
+static Reg*    firstr;
+static int     first   = 1;
+
+int
+rcmp(const void *a1, const void *a2)
+{
+       Rgn *p1, *p2;
+       int c1, c2;
+
+       p1 = (Rgn*)a1;
+       p2 = (Rgn*)a2;
+       c1 = p2->cost;
+       c2 = p1->cost;
+       if(c1 -= c2)
+               return c1;
+       return p2->varno - p1->varno;
+}
+
+static void
+setaddrs(Bits bit)
+{
+       int i, n;
+       Var *v;
+       Node *node;
+
+       while(bany(&bit)) {
+               // convert each bit to a variable
+               i = bnum(bit);
+               node = var[i].node;
+               n = var[i].name;
+               biclr(&bit, i);
+
+               // disable all pieces of that variable
+               for(i=0; i<nvar; i++) {
+                       v = var+i;
+                       if(v->node == node && v->name == n)
+                               v->addr = 2;
+               }
+       }
+}
+
+static Node* regnodes[64];
+
+static void walkvardef(Node *n, Reg *r, int active);
+
+void
+regopt(Prog *firstp)
+{
+       Reg *r, *r1;
+       Prog *p;
+       Graph *g;
+       ProgInfo info;
+       int i, z, active;
+       uint64 vreg, usedreg;
+       uint64 mask;
+       int nreg;
+       char **regnames;
+       Bits bit;
+
+       if(first) {
+               fmtinstall('Q', Qconv);
+               first = 0;
+       }
+
+       mergetemp(firstp);
+
+       /*
+        * control flow is more complicated in generated go code
+        * than in generated c code.  define pseudo-variables for
+        * registers, so we have complete register usage information.
+        */
+       regnames = arch.regnames(&nreg);
+       nvar = nreg;
+       memset(var, 0, nreg*sizeof var[0]);
+       for(i=0; i<nreg; i++) {
+               if(regnodes[i] == N)
+                       regnodes[i] = newname(lookup(regnames[i]));
+               var[i].node = regnodes[i];
+       }
+
+       regbits = arch.excludedregs();
+       externs = zbits;
+       params = zbits;
+       consts = zbits;
+       addrs = zbits;
+       ivar = zbits;
+       ovar = zbits;
+
+       /*
+        * pass 1
+        * build aux data structure
+        * allocate pcs
+        * find use and set of variables
+        */
+       g = flowstart(firstp, sizeof(Reg));
+       if(g == nil) {
+               for(i=0; i<nvar; i++)
+                       var[i].node->opt = nil;
+               return;
+       }
+
+       firstr = (Reg*)g->start;
+
+       for(r = firstr; r != R; r = (Reg*)r->f.link) {
+               p = r->f.prog;
+               if(p->as == AVARDEF || p->as == AVARKILL)
+                       continue;
+               arch.proginfo(&info, p);
+
+               // Avoid making variables for direct-called functions.
+               if(p->as == ACALL && p->to.type == TYPE_MEM && p->to.name == NAME_EXTERN)
+                       continue;
+
+               // from vs to doesn't matter for registers.
+               r->use1.b[0] |= info.reguse | info.regindex;
+               r->set.b[0] |= info.regset;
+
+               bit = mkvar(r, &p->from);
+               if(bany(&bit)) {
+                       if(info.flags & LeftAddr)
+                               setaddrs(bit);
+                       if(info.flags & LeftRead)
+                               for(z=0; z<BITS; z++)
+                                       r->use1.b[z] |= bit.b[z];
+                       if(info.flags & LeftWrite)
+                               for(z=0; z<BITS; z++)
+                                       r->set.b[z] |= bit.b[z];
+               }
+
+               // Compute used register for reg
+               if(info.flags & RegRead)
+                       r->use1.b[0] |= arch.RtoB(p->reg);
+
+               // Currently we never generate three register forms.
+               // If we do, this will need to change.
+               if(p->from3.type != TYPE_NONE)
+                       fatal("regopt not implemented for from3");
+
+               bit = mkvar(r, &p->to);
+               if(bany(&bit)) {        
+                       if(info.flags & RightAddr)
+                               setaddrs(bit);
+                       if(info.flags & RightRead)
+                               for(z=0; z<BITS; z++)
+                                       r->use2.b[z] |= bit.b[z];
+                       if(info.flags & RightWrite)
+                               for(z=0; z<BITS; z++)
+                                       r->set.b[z] |= bit.b[z];
+               }
+       }
+
+       for(i=0; i<nvar; i++) {
+               Var *v = var+i;
+               if(v->addr) {
+                       bit = blsh(i);
+                       for(z=0; z<BITS; z++)
+                               addrs.b[z] |= bit.b[z];
+               }
+
+               if(debug['R'] && debug['v'])
+                       print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
+                               i, v->addr, v->etype, v->width, v->node, v->offset);
+       }
+
+       if(debug['R'] && debug['v'])
+               dumpit("pass1", &firstr->f, 1);
+
+       /*
+        * pass 2
+        * find looping structure
+        */
+       flowrpo(g);
+
+       if(debug['R'] && debug['v'])
+               dumpit("pass2", &firstr->f, 1);
+
+       /*
+        * pass 2.5
+        * iterate propagating fat vardef covering forward
+        * r->act records vars with a VARDEF since the last CALL.
+        * (r->act will be reused in pass 5 for something else,
+        * but we'll be done with it by then.)
+        */
+       active = 0;
+       for(r = firstr; r != R; r = (Reg*)r->f.link) {
+               r->f.active = 0;
+               r->act = zbits;
+       }
+       for(r = firstr; r != R; r = (Reg*)r->f.link) {
+               p = r->f.prog;
+               if(p->as == AVARDEF && isfat(((Node*)(p->to.node))->type) && ((Node*)(p->to.node))->opt != nil) {
+                       active++;
+                       walkvardef(p->to.node, r, active);
+               }
+       }
+
+       /*
+        * pass 3
+        * iterate propagating usage
+        *      back until flow graph is complete
+        */
+loop1:
+       change = 0;
+       for(r = firstr; r != R; r = (Reg*)r->f.link)
+               r->f.active = 0;
+       for(r = firstr; r != R; r = (Reg*)r->f.link)
+               if(r->f.prog->as == ARET)
+                       prop(r, zbits, zbits);
+loop11:
+       /* pick up unreachable code */
+       i = 0;
+       for(r = firstr; r != R; r = r1) {
+               r1 = (Reg*)r->f.link;
+               if(r1 && r1->f.active && !r->f.active) {
+                       prop(r, zbits, zbits);
+                       i = 1;
+               }
+       }
+       if(i)
+               goto loop11;
+       if(change)
+               goto loop1;
+
+       if(debug['R'] && debug['v'])
+               dumpit("pass3", &firstr->f, 1);
+
+       /*
+        * pass 4
+        * iterate propagating register/variable synchrony
+        *      forward until graph is complete
+        */
+loop2:
+       change = 0;
+       for(r = firstr; r != R; r = (Reg*)r->f.link)
+               r->f.active = 0;
+       synch(firstr, zbits);
+       if(change)
+               goto loop2;
+
+       if(debug['R'] && debug['v'])
+               dumpit("pass4", &firstr->f, 1);
+
+       /*
+        * pass 4.5
+        * move register pseudo-variables into regu.
+        */
+       if(nreg == 64)
+               mask = ~0ULL; // can't rely on C to shift by 64
+       else
+               mask = (1ULL<<nreg) - 1;
+       for(r = firstr; r != R; r = (Reg*)r->f.link) {
+               r->regu = (r->refbehind.b[0] | r->set.b[0]) & mask;
+               r->set.b[0] &= ~mask;
+               r->use1.b[0] &= ~mask;
+               r->use2.b[0] &= ~mask;
+               r->refbehind.b[0] &= ~mask;
+               r->refahead.b[0] &= ~mask;
+               r->calbehind.b[0] &= ~mask;
+               r->calahead.b[0] &= ~mask;
+               r->regdiff.b[0] &= ~mask;
+               r->act.b[0] &= ~mask;
+       }
+
+       if(debug['R'] && debug['v'])
+               dumpit("pass4.5", &firstr->f, 1);
+
+       /*
+        * pass 5
+        * isolate regions
+        * calculate costs (paint1)
+        */
+       r = firstr;
+       if(r) {
+               for(z=0; z<BITS; z++)
+                       bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
+                         ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
+               if(bany(&bit) && !r->f.refset) {
+                       // should never happen - all variables are preset
+                       if(debug['w'])
+                               print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
+                       r->f.refset = 1;
+               }
+       }
+       for(r = firstr; r != R; r = (Reg*)r->f.link)
+               r->act = zbits;
+       rgp = region;
+       nregion = 0;
+       for(r = firstr; r != R; r = (Reg*)r->f.link) {
+               for(z=0; z<BITS; z++)
+                       bit.b[z] = r->set.b[z] &
+                         ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
+               if(bany(&bit) && !r->f.refset) {
+                       if(debug['w'])
+                               print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
+                       r->f.refset = 1;
+                       arch.excise(&r->f);
+               }
+               for(z=0; z<BITS; z++)
+                       bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
+               while(bany(&bit)) {
+                       i = bnum(bit);
+                       rgp->enter = r;
+                       rgp->varno = i;
+                       change = 0;
+                       paint1(r, i);
+                       biclr(&bit, i);
+                       if(change <= 0)
+                               continue;
+                       rgp->cost = change;
+                       nregion++;
+                       if(nregion >= NRGN) {
+                               if(debug['R'] && debug['v'])
+                                       print("too many regions\n");
+                               goto brk;
+                       }
+                       rgp++;
+               }
+       }
+brk:
+       qsort(region, nregion, sizeof(region[0]), rcmp);
+
+       if(debug['R'] && debug['v'])
+               dumpit("pass5", &firstr->f, 1);
+
+       /*
+        * pass 6
+        * determine used registers (paint2)
+        * replace code (paint3)
+        */
+       rgp = region;
+       if(debug['R'] && debug['v'])
+               print("\nregisterizing\n");
+       for(i=0; i<nregion; i++) {
+               if(debug['R'] && debug['v'])
+                       print("region %d: cost %d varno %d enter %lld\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
+               bit = blsh(rgp->varno);
+               usedreg = paint2(rgp->enter, rgp->varno, 0);
+               vreg = allreg(usedreg, rgp);
+               if(rgp->regno != 0) {
+                       if(debug['R'] && debug['v']) {
+                               Var *v;
+
+                               v = var + rgp->varno;
+                               print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%#llx vreg=%#llx\n",
+                                               v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg);
+                       }
+                       paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
+               }
+               rgp++;
+       }
+
+       /*
+        * free aux structures. peep allocates new ones.
+        */
+       for(i=0; i<nvar; i++)
+               var[i].node->opt = nil;
+       flowend(g);
+       firstr = R;
+
+       if(debug['R'] && debug['v']) {
+               // Rebuild flow graph, since we inserted instructions
+               g = flowstart(firstp, sizeof(Reg));
+               firstr = (Reg*)g->start;
+               dumpit("pass6", &firstr->f, 1);
+               flowend(g);
+               firstr = R;
+       }
+
+       /*
+        * pass 7
+        * peep-hole on basic block
+        */
+       if(!debug['R'] || debug['P'])
+               arch.peep(firstp);
+
+       /*
+        * eliminate nops
+        */
+       for(p=firstp; p!=P; p=p->link) {
+               while(p->link != P && p->link->as == ANOP)
+                       p->link = p->link->link;
+               if(p->to.type == TYPE_BRANCH)
+                       while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
+                               p->to.u.branch = p->to.u.branch->link;
+       }
+
+       if(debug['R']) {
+               if(ostats.ncvtreg ||
+                  ostats.nspill ||
+                  ostats.nreload ||
+                  ostats.ndelmov ||
+                  ostats.nvar ||
+                  ostats.naddr ||
+                  0)
+                       print("\nstats\n");
+
+               if(ostats.ncvtreg)
+                       print(" %4d cvtreg\n", ostats.ncvtreg);
+               if(ostats.nspill)
+                       print(" %4d spill\n", ostats.nspill);
+               if(ostats.nreload)
+                       print(" %4d reload\n", ostats.nreload);
+               if(ostats.ndelmov)
+                       print(" %4d delmov\n", ostats.ndelmov);
+               if(ostats.nvar)
+                       print(" %4d var\n", ostats.nvar);
+               if(ostats.naddr)
+                       print(" %4d addr\n", ostats.naddr);
+
+               memset(&ostats, 0, sizeof(ostats));
+       }
+}
+
+static void
+walkvardef(Node *n, Reg *r, int active)
+{
+       Reg *r1, *r2;
+       int bn;
+       Var *v;
+       
+       for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
+               if(r1->f.active == active)
+                       break;
+               r1->f.active = active;
+               if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
+                       break;
+               for(v=n->opt; v!=nil; v=v->nextinnode) {
+                       bn = v - var;
+                       biset(&r1->act, bn);
+               }
+               if(r1->f.prog->as == ACALL)
+                       break;
+       }
+
+       for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
+               if(r2->f.s2 != nil)
+                       walkvardef(n, (Reg*)r2->f.s2, active);
+}
+
+/*
+ * add mov b,rn
+ * just after r
+ */
+void
+addmove(Reg *r, int bn, int rn, int f)
+{
+       Prog *p, *p1;
+       Adr *a;
+       Var *v;
+
+       p1 = mal(sizeof(*p1));
+       clearp(p1);
+       p1->pc = 9999;
+
+       p = r->f.prog;
+       p1->link = p->link;
+       p->link = p1;
+       p1->lineno = p->lineno;
+
+       v = var + bn;
+
+       a = &p1->to;
+       a->offset = v->offset;
+       a->etype = v->etype;
+       a->type = TYPE_MEM;
+       a->name = v->name;
+       a->node = v->node;
+       a->sym = linksym(v->node->sym);
+       /* NOTE(rsc): 9g did
+       if(a->etype == TARRAY)
+               a->type = TYPE_ADDR;
+       else if(a->sym == nil)
+               a->type = TYPE_CONST;
+       */
+
+       p1->as = arch.optoas(OAS, types[(uchar)v->etype]);
+       // TODO(rsc): Remove special case here.
+       if((arch.thechar == '9' || arch.thechar == '5') && v->etype == TBOOL)
+               p1->as = arch.optoas(OAS, types[TUINT8]);
+       p1->from.type = TYPE_REG;
+       p1->from.reg = rn;
+       p1->from.name = NAME_NONE;
+       if(!f) {
+               p1->from = *a;
+               *a = zprog.from;
+               a->type = TYPE_REG;
+               a->reg = rn;
+       }
+       if(debug['R'] && debug['v'])
+               print("%P ===add=== %P\n", p, p1);
+       ostats.nspill++;
+}
+
+static int
+overlap(int64 o1, int w1, int64 o2, int w2)
+{
+       int64 t1, t2;
+
+       t1 = o1+w1;
+       t2 = o2+w2;
+
+       if(!(t1 > o2 && t2 > o1))
+               return 0;
+
+       return 1;
+}
+
+Bits
+mkvar(Reg *r, Adr *a)
+{
+       Var *v;
+       int i, n, et, z, flag;
+       int64 w;
+       uint64 regu;
+       int64 o;
+       Bits bit;
+       Node *node;
+
+       /*
+        * mark registers used
+        */
+       if(a->type == TYPE_NONE)
+               goto none;
+
+       if(r != R)
+               r->use1.b[0] |= arch.doregbits(a->index); // TODO: Use RtoB
+
+       switch(a->type) {
+       default:
+               regu = arch.doregbits(a->reg) | arch.RtoB(a->reg); // TODO: Use RtoB
+               if(regu == 0)
+                       goto none;
+               bit = zbits;
+               bit.b[0] = regu;
+               return bit;
+
+       case TYPE_ADDR:
+               // TODO(rsc): Remove special case here.
+               if(arch.thechar == '9' || arch.thechar == '5')
+                       goto memcase;
+               a->type = TYPE_MEM;
+               bit = mkvar(r, a);
+               setaddrs(bit);
+               a->type = TYPE_ADDR;
+               ostats.naddr++;
+               goto none;
+
+       case TYPE_MEM:
+       memcase:
+               if(r != R) {
+                       r->use1.b[0] |= arch.RtoB(a->reg);
+                       /* NOTE: 5g did
+                               if(r->f.prog->scond & (C_PBIT|C_WBIT))
+                                       r->set.b[0] |= RtoB(a->reg);
+                       */
+               }
+               switch(a->name) {
+               default:
+                       goto none;
+               case NAME_EXTERN:
+               case NAME_STATIC:
+               case NAME_PARAM:
+               case NAME_AUTO:
+                       n = a->name;
+                       break;
+               }
+       }
+
+       node = a->node;
+       if(node == N || node->op != ONAME || node->orig == N)
+               goto none;
+       node = node->orig;
+       if(node->orig != node)
+               fatal("%D: bad node", a);
+       if(node->sym == S || node->sym->name[0] == '.')
+               goto none;
+       et = a->etype;
+       o = a->offset;
+       w = a->width;
+       if(w < 0)
+               fatal("bad width %lld for %D", w, a);
+
+       flag = 0;
+       for(i=0; i<nvar; i++) {
+               v = var+i;
+               if(v->node == node && v->name == n) {
+                       if(v->offset == o)
+                       if(v->etype == et)
+                       if(v->width == w) {
+                               // TODO(rsc): Remove special case for arm here.
+                               if(!flag || arch.thechar != '5')
+                                       return blsh(i);
+                       }
+
+                       // if they overlap, disable both
+                       if(overlap(v->offset, v->width, o, w)) {
+//                             print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
+                               v->addr = 1;
+                               flag = 1;
+                       }
+               }
+       }
+
+       switch(et) {
+       case 0:
+       case TFUNC:
+               goto none;
+       }
+
+       if(nvar >= NVAR) {
+               if(debug['w'] > 1 && node != N)
+                       fatal("variable not optimized: %#N", node);
+               
+               // If we're not tracking a word in a variable, mark the rest as
+               // having its address taken, so that we keep the whole thing
+               // live at all calls. otherwise we might optimize away part of
+               // a variable but not all of it.
+               for(i=0; i<nvar; i++) {
+                       v = var+i;
+                       if(v->node == node)
+                               v->addr = 1;
+               }
+               goto none;
+       }
+
+       i = nvar;
+       nvar++;
+       v = var+i;
+       v->offset = o;
+       v->name = n;
+       v->etype = et;
+       v->width = w;
+       v->addr = flag;         // funny punning
+       v->node = node;
+       
+       // node->opt is the head of a linked list
+       // of Vars within the given Node, so that
+       // we can start at a Var and find all the other
+       // Vars in the same Go variable.
+       v->nextinnode = node->opt;
+       node->opt = v;
+
+       bit = blsh(i);
+       if(n == NAME_EXTERN || n == NAME_STATIC)
+               for(z=0; z<BITS; z++)
+                       externs.b[z] |= bit.b[z];
+       if(n == NAME_PARAM)
+               for(z=0; z<BITS; z++)
+                       params.b[z] |= bit.b[z];
+
+       if(node->class == PPARAM)
+               for(z=0; z<BITS; z++)
+                       ivar.b[z] |= bit.b[z];
+       if(node->class == PPARAMOUT)
+               for(z=0; z<BITS; z++)
+                       ovar.b[z] |= bit.b[z];
+
+       // Treat values with their address taken as live at calls,
+       // because the garbage collector's liveness analysis in ../gc/plive.c does.
+       // These must be consistent or else we will elide stores and the garbage
+       // collector will see uninitialized data.
+       // The typical case where our own analysis is out of sync is when the
+       // node appears to have its address taken but that code doesn't actually
+       // get generated and therefore doesn't show up as an address being
+       // taken when we analyze the instruction stream.
+       // One instance of this case is when a closure uses the same name as
+       // an outer variable for one of its own variables declared with :=.
+       // The parser flags the outer variable as possibly shared, and therefore
+       // sets addrtaken, even though it ends up not being actually shared.
+       // If we were better about _ elision, _ = &x would suffice too.
+       // The broader := in a closure problem is mentioned in a comment in
+       // closure.c:/^typecheckclosure and dcl.c:/^oldname.
+       if(node->addrtaken)
+               v->addr = 1;
+
+       // Disable registerization for globals, because:
+       // (1) we might panic at any time and we want the recovery code
+       // to see the latest values (issue 1304).
+       // (2) we don't know what pointers might point at them and we want
+       // loads via those pointers to see updated values and vice versa (issue 7995).
+       //
+       // Disable registerization for results if using defer, because the deferred func
+       // might recover and return, causing the current values to be used.
+       if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
+               v->addr = 1;
+
+       if(debug['R'])
+               print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
+       ostats.nvar++;
+
+       return bit;
+
+none:
+       return zbits;
+}
+
+void
+prop(Reg *r, Bits ref, Bits cal)
+{
+       Reg *r1, *r2;
+       int z, i, j;
+       Var *v, *v1;
+
+       for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
+               for(z=0; z<BITS; z++) {
+                       ref.b[z] |= r1->refahead.b[z];
+                       if(ref.b[z] != r1->refahead.b[z]) {
+                               r1->refahead.b[z] = ref.b[z];
+                               change++;
+                       }
+                       cal.b[z] |= r1->calahead.b[z];
+                       if(cal.b[z] != r1->calahead.b[z]) {
+                               r1->calahead.b[z] = cal.b[z];
+                               change++;
+                       }
+               }
+               switch(r1->f.prog->as) {
+               case ACALL:
+                       if(noreturn(r1->f.prog))
+                               break;
+
+                       // Mark all input variables (ivar) as used, because that's what the
+                       // liveness bitmaps say. The liveness bitmaps say that so that a
+                       // panic will not show stale values in the parameter dump.
+                       // Mark variables with a recent VARDEF (r1->act) as used,
+                       // so that the optimizer flushes initializations to memory,
+                       // so that if a garbage collection happens during this CALL,
+                       // the collector will see initialized memory. Again this is to
+                       // match what the liveness bitmaps say.
+                       for(z=0; z<BITS; z++) {
+                               cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
+                               ref.b[z] = 0;
+                       }
+                       
+                       // cal.b is the current approximation of what's live across the call.
+                       // Every bit in cal.b is a single stack word. For each such word,
+                       // find all the other tracked stack words in the same Go variable
+                       // (struct/slice/string/interface) and mark them live too.
+                       // This is necessary because the liveness analysis for the garbage
+                       // collector works at variable granularity, not at word granularity.
+                       // It is fundamental for slice/string/interface: the garbage collector
+                       // needs the whole value, not just some of the words, in order to
+                       // interpret the other bits correctly. Specifically, slice needs a consistent
+                       // ptr and cap, string needs a consistent ptr and len, and interface
+                       // needs a consistent type word and data word.
+                       for(z=0; z<BITS; z++) {
+                               if(cal.b[z] == 0)
+                                       continue;
+                               for(i=0; i<64; i++) {
+                                       if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
+                                               continue;
+                                       v = var+z*64+i;
+                                       if(v->node->opt == nil) // v represents fixed register, not Go variable
+                                               continue;
+
+                                       // v->node->opt is the head of a linked list of Vars
+                                       // corresponding to tracked words from the Go variable v->node.
+                                       // Walk the list and set all the bits.
+                                       // For a large struct this could end up being quadratic:
+                                       // after the first setting, the outer loop (for z, i) would see a 1 bit
+                                       // for all of the remaining words in the struct, and for each such
+                                       // word would go through and turn on all the bits again.
+                                       // To avoid the quadratic behavior, we only turn on the bits if
+                                       // v is the head of the list or if the head's bit is not yet turned on.
+                                       // This will set the bits at most twice, keeping the overall loop linear.
+                                       v1 = v->node->opt;
+                                       j = v1 - var;
+                                       if(v == v1 || !btest(&cal, j)) {
+                                               for(; v1 != nil; v1 = v1->nextinnode) {
+                                                       j = v1 - var;
+                                                       biset(&cal, j);
+                                               }
+                                       }
+                               }
+                       }
+                       break;
+
+               case ATEXT:
+                       for(z=0; z<BITS; z++) {
+                               cal.b[z] = 0;
+                               ref.b[z] = 0;
+                       }
+                       break;
+
+               case ARET:
+                       for(z=0; z<BITS; z++) {
+                               cal.b[z] = externs.b[z] | ovar.b[z];
+                               ref.b[z] = 0;
+                       }
+                       break;
+               }
+               for(z=0; z<BITS; z++) {
+                       ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
+                               r1->use1.b[z] | r1->use2.b[z];
+                       cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
+                       r1->refbehind.b[z] = ref.b[z];
+                       r1->calbehind.b[z] = cal.b[z];
+               }
+               if(r1->f.active)
+                       break;
+               r1->f.active = 1;
+       }
+       for(; r != r1; r = (Reg*)r->f.p1)
+               for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
+                       prop(r2, r->refbehind, r->calbehind);
+}
+
+void
+synch(Reg *r, Bits dif)
+{
+       Reg *r1;
+       int z;
+
+       for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
+               for(z=0; z<BITS; z++) {
+                       dif.b[z] = (dif.b[z] &
+                               ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
+                                       r1->set.b[z] | r1->regdiff.b[z];
+                       if(dif.b[z] != r1->regdiff.b[z]) {
+                               r1->regdiff.b[z] = dif.b[z];
+                               change++;
+                       }
+               }
+               if(r1->f.active)
+                       break;
+               r1->f.active = 1;
+               for(z=0; z<BITS; z++)
+                       dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
+               if(r1->f.s2 != nil)
+                       synch((Reg*)r1->f.s2, dif);
+       }
+}
+
+uint64
+allreg(uint64 b, Rgn *r)
+{
+       Var *v;
+       int i;
+
+       v = var + r->varno;
+       r->regno = 0;
+       switch(v->etype) {
+
+       default:
+               fatal("unknown etype %d/%E", bitno(b), v->etype);
+               break;
+
+       case TINT8:
+       case TUINT8:
+       case TINT16:
+       case TUINT16:
+       case TINT32:
+       case TUINT32:
+       case TINT64:
+       case TUINT64:
+       case TINT:
+       case TUINT:
+       case TUINTPTR:
+       case TBOOL:
+       case TPTR32:
+       case TPTR64:
+               i = arch.BtoR(~b);
+               if(i && r->cost > 0) {
+                       r->regno = i;
+                       return arch.RtoB(i);
+               }
+               break;
+
+       case TFLOAT32:
+       case TFLOAT64:
+               i = arch.BtoF(~b);
+               if(i && r->cost > 0) {
+                       r->regno = i;
+                       return arch.FtoB(i);
+               }
+               break;
+       }
+       return 0;
+}
+
+void
+paint1(Reg *r, int bn)
+{
+       Reg *r1;
+       int z;
+       uint64 bb;
+
+       z = bn/64;
+       bb = 1LL<<(bn%64);
+       if(r->act.b[z] & bb)
+               return;
+       for(;;) {
+               if(!(r->refbehind.b[z] & bb))
+                       break;
+               r1 = (Reg*)r->f.p1;
+               if(r1 == R)
+                       break;
+               if(!(r1->refahead.b[z] & bb))
+                       break;
+               if(r1->act.b[z] & bb)
+                       break;
+               r = r1;
+       }
+
+       if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
+               change -= CLOAD * r->f.loop;
+       }
+       for(;;) {
+               r->act.b[z] |= bb;
+
+               if(r->f.prog->as != ANOP) { // don't give credit for NOPs
+                       if(r->use1.b[z] & bb)
+                               change += CREF * r->f.loop;
+                       if((r->use2.b[z]|r->set.b[z]) & bb)
+                               change += CREF * r->f.loop;
+               }
+
+               if(STORE(r) & r->regdiff.b[z] & bb) {
+                       change -= CLOAD * r->f.loop;
+               }
+
+               if(r->refbehind.b[z] & bb)
+                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+                               if(r1->refahead.b[z] & bb)
+                                       paint1(r1, bn);
+
+               if(!(r->refahead.b[z] & bb))
+                       break;
+               r1 = (Reg*)r->f.s2;
+               if(r1 != R)
+                       if(r1->refbehind.b[z] & bb)
+                               paint1(r1, bn);
+               r = (Reg*)r->f.s1;
+               if(r == R)
+                       break;
+               if(r->act.b[z] & bb)
+                       break;
+               if(!(r->refbehind.b[z] & bb))
+                       break;
+       }
+}
+
+uint64
+paint2(Reg *r, int bn, int depth)
+{
+       Reg *r1;
+       int z;
+       uint64 bb, vreg;
+
+       z = bn/64;
+       bb = 1LL << (bn%64);
+       vreg = regbits;
+       if(!(r->act.b[z] & bb))
+               return vreg;
+       for(;;) {
+               if(!(r->refbehind.b[z] & bb))
+                       break;
+               r1 = (Reg*)r->f.p1;
+               if(r1 == R)
+                       break;
+               if(!(r1->refahead.b[z] & bb))
+                       break;
+               if(!(r1->act.b[z] & bb))
+                       break;
+               r = r1;
+       }
+       for(;;) {
+               if(debug['R'] && debug['v'])
+                       print("  paint2 %d %P\n", depth, r->f.prog);
+
+               r->act.b[z] &= ~bb;
+
+               vreg |= r->regu;
+
+               if(r->refbehind.b[z] & bb)
+                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+                               if(r1->refahead.b[z] & bb)
+                                       vreg |= paint2(r1, bn, depth+1);
+
+               if(!(r->refahead.b[z] & bb))
+                       break;
+               r1 = (Reg*)r->f.s2;
+               if(r1 != R)
+                       if(r1->refbehind.b[z] & bb)
+                               vreg |= paint2(r1, bn, depth+1);
+               r = (Reg*)r->f.s1;
+               if(r == R)
+                       break;
+               if(!(r->act.b[z] & bb))
+                       break;
+               if(!(r->refbehind.b[z] & bb))
+                       break;
+       }
+
+       return vreg;
+}
+
+void
+paint3(Reg *r, int bn, uint64 rb, int rn)
+{
+       Reg *r1;
+       Prog *p;
+       int z;
+       uint64 bb;
+
+       z = bn/64;
+       bb = 1LL << (bn%64);
+       if(r->act.b[z] & bb)
+               return;
+       for(;;) {
+               if(!(r->refbehind.b[z] & bb))
+                       break;
+               r1 = (Reg*)r->f.p1;
+               if(r1 == R)
+                       break;
+               if(!(r1->refahead.b[z] & bb))
+                       break;
+               if(r1->act.b[z] & bb)
+                       break;
+               r = r1;
+       }
+
+       if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
+               addmove(r, bn, rn, 0);
+       for(;;) {
+               r->act.b[z] |= bb;
+               p = r->f.prog;
+
+               if(r->use1.b[z] & bb) {
+                       if(debug['R'] && debug['v'])
+                               print("%P", p);
+                       addreg(&p->from, rn);
+                       if(debug['R'] && debug['v'])
+                               print(" ===change== %P\n", p);
+               }
+               if((r->use2.b[z]|r->set.b[z]) & bb) {
+                       if(debug['R'] && debug['v'])
+                               print("%P", p);
+                       addreg(&p->to, rn);
+                       if(debug['R'] && debug['v'])
+                               print(" ===change== %P\n", p);
+               }
+
+               if(STORE(r) & r->regdiff.b[z] & bb)
+                       addmove(r, bn, rn, 1);
+               r->regu |= rb;
+
+               if(r->refbehind.b[z] & bb)
+                       for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+                               if(r1->refahead.b[z] & bb)
+                                       paint3(r1, bn, rb, rn);
+
+               if(!(r->refahead.b[z] & bb))
+                       break;
+               r1 = (Reg*)r->f.s2;
+               if(r1 != R)
+                       if(r1->refbehind.b[z] & bb)
+                               paint3(r1, bn, rb, rn);
+               r = (Reg*)r->f.s1;
+               if(r == R)
+                       break;
+               if(r->act.b[z] & bb)
+                       break;
+               if(!(r->refbehind.b[z] & bb))
+                       break;
+       }
+}
+
+void
+addreg(Adr *a, int rn)
+{
+       a->sym = nil;
+       a->node = nil;
+       a->offset = 0;
+       a->type = TYPE_REG;
+       a->reg = rn;
+       a->name = 0;
+
+       ostats.ncvtreg++;
+}
+
+void
+dumpone(Flow *f, int isreg)
+{
+       int z;
+       Bits bit;
+       Reg *r;
+
+       print("%d:%P", f->loop, f->prog);
+       if(isreg) {     
+               r = (Reg*)f;
+               for(z=0; z<BITS; z++)
+                       bit.b[z] =
+                               r->set.b[z] |
+                               r->use1.b[z] |
+                               r->use2.b[z] |
+                               r->refbehind.b[z] |
+                               r->refahead.b[z] |
+                               r->calbehind.b[z] |
+                               r->calahead.b[z] |
+                               r->regdiff.b[z] |
+                               r->act.b[z] |
+                                       0;
+               if(bany(&bit)) {
+                       print("\t");
+                       if(bany(&r->set))
+                               print(" s:%Q", r->set);
+                       if(bany(&r->use1))
+                               print(" u1:%Q", r->use1);
+                       if(bany(&r->use2))
+                               print(" u2:%Q", r->use2);
+                       if(bany(&r->refbehind))
+                               print(" rb:%Q ", r->refbehind);
+                       if(bany(&r->refahead))
+                               print(" ra:%Q ", r->refahead);
+                       if(bany(&r->calbehind))
+                               print(" cb:%Q ", r->calbehind);
+                       if(bany(&r->calahead))
+                               print(" ca:%Q ", r->calahead);
+                       if(bany(&r->regdiff))
+                               print(" d:%Q ", r->regdiff);
+                       if(bany(&r->act))
+                               print(" a:%Q ", r->act);
+               }
+       }
+       print("\n");
+}
+
+void
+dumpit(char *str, Flow *r0, int isreg)
+{
+       Flow *r, *r1;
+
+       print("\n%s\n", str);
+       for(r = r0; r != nil; r = r->link) {
+               dumpone(r, isreg);
+               r1 = r->p2;
+               if(r1 != nil) {
+                       print(" pred:");
+                       for(; r1 != nil; r1 = r1->p2link)
+                               print(" %.4ud", (int)r1->prog->pc);
+                       if(r->p1 != nil)
+                               print(" (and %.4ud)", (int)r->p1->prog->pc);
+                       else
+                               print(" (only)");
+                       print("\n");
+               }
+               // Print successors if it's not just the next one
+               if(r->s1 != r->link || r->s2 != nil) {
+                       print(" succ:");
+                       if(r->s1 != nil)
+                               print(" %.4ud", (int)r->s1->prog->pc);
+                       if(r->s2 != nil)
+                               print(" %.4ud", (int)r->s2->prog->pc);
+                       print("\n");
+               }
+       }
+}