--- /dev/null
+// Derived from Inferno utils/6c/gc.h
+// http://code.google.com/p/inferno-os/source/browse/utils/6c/gc.h
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#define Z N
+#define Adr Addr
+
+#define BITS 5
+#define NVAR (BITS*sizeof(uint32)*8)
+
+#define D_HI D_NONE
+#define D_LO D_NONE
+
+#define isregtype(t) ((t)>= D_AX && (t)<=D_R15)
+
+#define BLOAD(r) band(bnot(r->refbehind), r->refahead)
+#define BSTORE(r) band(bnot(r->calbehind), r->calahead)
+#define LOAD(r) (~r->refbehind.b[z] & r->refahead.b[z])
+#define STORE(r) (~r->calbehind.b[z] & r->calahead.b[z])
+
+#define CLOAD 5
+#define CREF 5
+#define CINF 1000
+#define LOOP 3
+
+typedef struct Bits Bits;
+typedef struct Reg Reg;
+typedef struct Var Var;
+typedef struct Rgn Rgn;
+
+struct Bits
+{
+ uint32 b[BITS];
+};
+
+
+struct Reg
+{
+
+ Bits set;
+ Bits use1;
+ Bits use2;
+
+ Bits refbehind;
+ Bits refahead;
+ Bits calbehind;
+ Bits calahead;
+ Bits regdiff;
+ Bits act;
+
+ int32 regu;
+ int32 loop; /* could be shorter */
+ int32 rpo; /* reverse post ordering */
+ int32 active;
+
+// uint32 magic;
+// int32 pc;
+// Reg* log5;
+
+ Reg* p1;
+ Reg* p2;
+ Reg* p2link;
+ Reg* s1;
+ Reg* s2;
+ Reg* link;
+ Prog* prog;
+};
+#define R ((Reg*)0)
+
+struct Var
+{
+ vlong offset;
+ Sym* sym;
+ char name;
+ char etype;
+};
+
+#define NRGN 600
+struct Rgn
+{
+ Reg* enter;
+ short cost;
+ short varno;
+ short regno;
+};
+
+
+EXTERN int32 exregoffset; // not set
+EXTERN int32 exfregoffset; // not set
+EXTERN Reg* firstr;
+EXTERN Reg* lastr;
+EXTERN Reg zreg;
+EXTERN Reg* freer;
+EXTERN Var var[NVAR];
+EXTERN Reg** rpo2r;
+EXTERN Rgn region[NRGN];
+EXTERN Rgn* rgp;
+EXTERN int nregion;
+EXTERN int nvar;
+EXTERN int32 regbits;
+EXTERN int32 exregbits;
+EXTERN Bits externs;
+EXTERN Bits params;
+EXTERN Bits consts;
+EXTERN Bits addrs;
+EXTERN int change;
+EXTERN Bits zbits;
+EXTERN uchar typechlpfd[NTYPE]; // botch
+EXTERN uchar typev[NTYPE]; // botch
+EXTERN int32 maxnr;
+EXTERN int32* idom;
+
+/*
+ * bits.c
+ */
+Bits bor(Bits, Bits);
+Bits band(Bits, Bits);
+Bits bnot(Bits);
+int bany(Bits*);
+int bnum(Bits);
+Bits blsh(uint);
+int beq(Bits, Bits);
+int bset(Bits, uint);
+int Qconv(Fmt *fp);
+
+/*
+ * reg.c
+ */
+Reg* rega(void);
+int rcmp(const void*, const void*);
+void regopt(Prog*);
+void addmove(Reg*, int, int, int);
+Bits mkvar(Reg*, Adr*);
+void prop(Reg*, Bits, Bits);
+void loopit(Reg*, int32);
+void synch(Reg*, Bits);
+uint32 allreg(uint32, Rgn*);
+void paint1(Reg*, int);
+uint32 paint2(Reg*, int);
+void paint3(Reg*, int, int32, int);
+void addreg(Adr*, int);
+
+/*
+ * peep.c
+ */
+void peep(void);
+void excise(Reg*);
+Reg* uniqp(Reg*);
+Reg* uniqs(Reg*);
+int regtyp(Adr*);
+int anyvar(Adr*);
+int subprop(Reg*);
+int copyprop(Reg*);
+int copy1(Adr*, Adr*, Reg*, int);
+int copyu(Prog*, Adr*, Adr*);
+
+int copyas(Adr*, Adr*);
+int copyau(Adr*, Adr*);
+int copysub(Adr*, Adr*, Adr*, int);
+int copysub1(Prog*, Adr*, Adr*, int);
+
+int32 RtoB(int);
+int32 FtoB(int);
+int BtoR(int32);
+int BtoF(int32);
--- /dev/null
+// Derived from Inferno utils/6c/peep.c
+// http://code.google.com/p/inferno-os/source/browse/utils/6c/peep.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "gg.h"
+#include "opt.h"
+
+static int
+needc(Prog *p)
+{
+ while(p != P) {
+ switch(p->as) {
+ case AADCL:
+ case AADCQ:
+ case ASBBL:
+ case ASBBQ:
+ case ARCRL:
+ case ARCRQ:
+ return 1;
+ case AADDL:
+ case AADDQ:
+ case ASUBL:
+ case ASUBQ:
+ case AJMP:
+ case ARET:
+ case ACALL:
+ return 0;
+ default:
+ if(p->to.type == D_BRANCH)
+ return 0;
+ }
+ p = p->link;
+ }
+ return 0;
+}
+
+static Reg*
+rnops(Reg *r)
+{
+ Prog *p;
+ Reg *r1;
+
+ if(r != R)
+ for(;;){
+ p = r->prog;
+ if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE)
+ break;
+ r1 = uniqs(r);
+ if(r1 == R)
+ break;
+ r = r1;
+ }
+ return r;
+}
+
+void
+peep(void)
+{
+ Reg *r, *r1, *r2;
+ Prog *p, *p1;
+ int t;
+
+ /*
+ * complete R structure
+ */
+ t = 0;
+ for(r=firstr; r!=R; r=r1) {
+ r1 = r->link;
+ if(r1 == R)
+ break;
+ p = r->prog->link;
+ while(p != r1->prog)
+ switch(p->as) {
+ default:
+ r2 = rega();
+ r->link = r2;
+ r2->link = r1;
+
+ r2->prog = p;
+ r2->p1 = r;
+ r->s1 = r2;
+ r2->s1 = r1;
+ r1->p1 = r2;
+
+ r = r2;
+ t++;
+
+ case ADATA:
+ case AGLOBL:
+ case ANAME:
+ case ASIGNAME:
+ p = p->link;
+ }
+ }
+
+ pc = 0; /* speculating it won't kill */
+
+loop1:
+
+ t = 0;
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ switch(p->as) {
+ case AMOVL:
+ case AMOVQ:
+ case AMOVSS:
+ case AMOVSD:
+ if(regtyp(&p->to))
+ if(regtyp(&p->from)) {
+ if(copyprop(r)) {
+ excise(r);
+ t++;
+ } else
+ if(subprop(r) && copyprop(r)) {
+ excise(r);
+ t++;
+ }
+ }
+ break;
+
+ case AMOVBLZX:
+ case AMOVWLZX:
+ case AMOVBLSX:
+ case AMOVWLSX:
+ if(regtyp(&p->to)) {
+ r1 = rnops(uniqs(r));
+ if(r1 != R) {
+ p1 = r1->prog;
+ if(p->as == p1->as && p->to.type == p1->from.type){
+ p1->as = AMOVL;
+ t++;
+ }
+ }
+ }
+ break;
+
+ case AMOVBQSX:
+ case AMOVBQZX:
+ case AMOVWQSX:
+ case AMOVWQZX:
+ case AMOVLQSX:
+ case AMOVLQZX:
+ if(regtyp(&p->to)) {
+ r1 = rnops(uniqs(r));
+ if(r1 != R) {
+ p1 = r1->prog;
+ if(p->as == p1->as && p->to.type == p1->from.type){
+ p1->as = AMOVQ;
+ t++;
+ }
+ }
+ }
+ break;
+
+ case AADDL:
+ case AADDQ:
+ case AADDW:
+ if(p->from.type != D_CONST || needc(p->link))
+ break;
+ if(p->from.offset == -1){
+ if(p->as == AADDQ)
+ p->as = ADECQ;
+ else if(p->as == AADDL)
+ p->as = ADECL;
+ else
+ p->as = ADECW;
+ p->from = zprog.from;
+ }
+ else if(p->from.offset == 1){
+ if(p->as == AADDQ)
+ p->as = AINCQ;
+ else if(p->as == AADDL)
+ p->as = AINCL;
+ else
+ p->as = AINCW;
+ p->from = zprog.from;
+ }
+ break;
+
+ case ASUBL:
+ case ASUBQ:
+ case ASUBW:
+ if(p->from.type != D_CONST || needc(p->link))
+ break;
+ if(p->from.offset == -1) {
+ if(p->as == ASUBQ)
+ p->as = AINCQ;
+ else if(p->as == ASUBL)
+ p->as = AINCL;
+ else
+ p->as = AINCW;
+ p->from = zprog.from;
+ }
+ else if(p->from.offset == 1){
+ if(p->as == ASUBQ)
+ p->as = ADECQ;
+ else if(p->as == ASUBL)
+ p->as = ADECL;
+ else
+ p->as = ADECW;
+ p->from = zprog.from;
+ }
+ break;
+ }
+ }
+ if(t)
+ goto loop1;
+}
+
+void
+excise(Reg *r)
+{
+ Prog *p;
+
+ p = r->prog;
+ p->as = ANOP;
+ p->from = zprog.from;
+ p->to = zprog.to;
+}
+
+Reg*
+uniqp(Reg *r)
+{
+ Reg *r1;
+
+ r1 = r->p1;
+ if(r1 == R) {
+ r1 = r->p2;
+ if(r1 == R || r1->p2link != R)
+ return R;
+ } else
+ if(r->p2 != R)
+ return R;
+ return r1;
+}
+
+Reg*
+uniqs(Reg *r)
+{
+ Reg *r1;
+
+ r1 = r->s1;
+ if(r1 == R) {
+ r1 = r->s2;
+ if(r1 == R)
+ return R;
+ } else
+ if(r->s2 != R)
+ return R;
+ return r1;
+}
+
+int
+regtyp(Adr *a)
+{
+ int t;
+
+ t = a->type;
+ if(t >= D_AX && t <= D_R15)
+ return 1;
+ if(t >= D_X0 && t <= D_X0+15)
+ return 1;
+ return 0;
+}
+
+/*
+ * the idea is to substitute
+ * one register for another
+ * from one MOV to another
+ * MOV a, R0
+ * ADD b, R0 / no use of R1
+ * MOV R0, R1
+ * would be converted to
+ * MOV a, R1
+ * ADD b, R1
+ * MOV R1, R0
+ * hopefully, then the former or latter MOV
+ * will be eliminated by copy propagation.
+ */
+int
+subprop(Reg *r0)
+{
+ Prog *p;
+ Adr *v1, *v2;
+ Reg *r;
+ int t;
+
+ p = r0->prog;
+ v1 = &p->from;
+ if(!regtyp(v1))
+ return 0;
+ v2 = &p->to;
+ if(!regtyp(v2))
+ return 0;
+ for(r=uniqp(r0); r!=R; r=uniqp(r)) {
+ if(uniqs(r) == R)
+ break;
+ p = r->prog;
+ switch(p->as) {
+ case ACALL:
+ return 0;
+
+ case AIMULL:
+ case AIMULQ:
+ case AIMULW:
+ if(p->to.type != D_NONE)
+ break;
+
+ case ADIVB:
+ case ADIVL:
+ case ADIVQ:
+ case ADIVW:
+ case AIDIVB:
+ case AIDIVL:
+ case AIDIVQ:
+ case AIDIVW:
+ case AIMULB:
+ case AMULB:
+ case AMULL:
+ case AMULQ:
+ case AMULW:
+
+ case AROLB:
+ case AROLL:
+ case AROLQ:
+ case AROLW:
+ case ARORB:
+ case ARORL:
+ case ARORQ:
+ case ARORW:
+ case ASALB:
+ case ASALL:
+ case ASALQ:
+ case ASALW:
+ case ASARB:
+ case ASARL:
+ case ASARQ:
+ case ASARW:
+ case ASHLB:
+ case ASHLL:
+ case ASHLQ:
+ case ASHLW:
+ case ASHRB:
+ case ASHRL:
+ case ASHRQ:
+ case ASHRW:
+
+ case AREP:
+ case AREPN:
+
+ case ACWD:
+ case ACDQ:
+ case ACQO:
+
+ case AMOVSL:
+ case AMOVSQ:
+ return 0;
+
+ case AMOVL:
+ case AMOVQ:
+ if(p->to.type == v1->type)
+ goto gotit;
+ break;
+ }
+ if(copyau(&p->from, v2) ||
+ copyau(&p->to, v2))
+ break;
+ if(copysub(&p->from, v1, v2, 0) ||
+ copysub(&p->to, v1, v2, 0))
+ break;
+ }
+ return 0;
+
+gotit:
+ copysub(&p->to, v1, v2, 1);
+ if(debug['P']) {
+ print("gotit: %D->%D\n%P", v1, v2, r->prog);
+ if(p->from.type == v2->type)
+ print(" excise");
+ print("\n");
+ }
+ for(r=uniqs(r); r!=r0; r=uniqs(r)) {
+ p = r->prog;
+ copysub(&p->from, v1, v2, 1);
+ copysub(&p->to, v1, v2, 1);
+ if(debug['P'])
+ print("%P\n", r->prog);
+ }
+ t = v1->type;
+ v1->type = v2->type;
+ v2->type = t;
+ if(debug['P'])
+ print("%P last\n", r->prog);
+ return 1;
+}
+
+/*
+ * The idea is to remove redundant copies.
+ * v1->v2 F=0
+ * (use v2 s/v2/v1/)*
+ * set v1 F=1
+ * use v2 return fail
+ * -----------------
+ * v1->v2 F=0
+ * (use v2 s/v2/v1/)*
+ * set v1 F=1
+ * set v2 return success
+ */
+int
+copyprop(Reg *r0)
+{
+ Prog *p;
+ Adr *v1, *v2;
+ Reg *r;
+
+ p = r0->prog;
+ v1 = &p->from;
+ v2 = &p->to;
+ if(copyas(v1, v2))
+ return 1;
+ for(r=firstr; r!=R; r=r->link)
+ r->active = 0;
+ return copy1(v1, v2, r0->s1, 0);
+}
+
+int
+copy1(Adr *v1, Adr *v2, Reg *r, int f)
+{
+ int t;
+ Prog *p;
+
+ if(r->active) {
+ if(debug['P'])
+ print("act set; return 1\n");
+ return 1;
+ }
+ r->active = 1;
+ if(debug['P'])
+ print("copy %D->%D f=%d\n", v1, v2, f);
+ for(; r != R; r = r->s1) {
+ p = r->prog;
+ if(debug['P'])
+ print("%P", p);
+ if(!f && uniqp(r) == R) {
+ f = 1;
+ if(debug['P'])
+ print("; merge; f=%d", f);
+ }
+ t = copyu(p, v2, A);
+ switch(t) {
+ case 2: /* rar, cant split */
+ if(debug['P'])
+ print("; %D rar; return 0\n", v2);
+ return 0;
+
+ case 3: /* set */
+ if(debug['P'])
+ print("; %D set; return 1\n", v2);
+ return 1;
+
+ case 1: /* used, substitute */
+ case 4: /* use and set */
+ if(f) {
+ if(!debug['P'])
+ return 0;
+ if(t == 4)
+ print("; %D used+set and f=%d; return 0\n", v2, f);
+ else
+ print("; %D used and f=%d; return 0\n", v2, f);
+ return 0;
+ }
+ if(copyu(p, v2, v1)) {
+ if(debug['P'])
+ print("; sub fail; return 0\n");
+ return 0;
+ }
+ if(debug['P'])
+ print("; sub %D/%D", v2, v1);
+ if(t == 4) {
+ if(debug['P'])
+ print("; %D used+set; return 1\n", v2);
+ return 1;
+ }
+ break;
+ }
+ if(!f) {
+ t = copyu(p, v1, A);
+ if(!f && (t == 2 || t == 3 || t == 4)) {
+ f = 1;
+ if(debug['P'])
+ print("; %D set and !f; f=%d", v1, f);
+ }
+ }
+ if(debug['P'])
+ print("\n");
+ if(r->s2)
+ if(!copy1(v1, v2, r->s2, f))
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * return
+ * 1 if v only used (and substitute),
+ * 2 if read-alter-rewrite
+ * 3 if set
+ * 4 if set and used
+ * 0 otherwise (not touched)
+ */
+int
+copyu(Prog *p, Adr *v, Adr *s)
+{
+
+ switch(p->as) {
+
+ default:
+ if(debug['P'])
+ print("unknown op %A\n", p->as);
+ /* SBBL; ADCL; FLD1; SAHF */
+ return 2;
+
+
+ case ANEGB:
+ case ANEGW:
+ case ANEGL:
+ case ANEGQ:
+ case ANOTB:
+ case ANOTW:
+ case ANOTL:
+ case ANOTQ:
+ if(copyas(&p->to, v))
+ return 2;
+ break;
+
+ case ALEAL: /* lhs addr, rhs store */
+ case ALEAQ:
+ if(copyas(&p->from, v))
+ return 2;
+
+
+ case ANOP: /* rhs store */
+ case AMOVL:
+ case AMOVQ:
+ case AMOVBLSX:
+ case AMOVBLZX:
+ case AMOVBQSX:
+ case AMOVBQZX:
+ case AMOVLQSX:
+ case AMOVLQZX:
+ case AMOVWLSX:
+ case AMOVWLZX:
+ case AMOVWQSX:
+ case AMOVWQZX:
+
+ case AMOVSS:
+ case AMOVSD:
+ case ACVTSD2SL:
+ case ACVTSD2SQ:
+ case ACVTSD2SS:
+ case ACVTSL2SD:
+ case ACVTSL2SS:
+ case ACVTSQ2SD:
+ case ACVTSQ2SS:
+ case ACVTSS2SD:
+ case ACVTSS2SL:
+ case ACVTSS2SQ:
+ case ACVTTSD2SL:
+ case ACVTTSD2SQ:
+ case ACVTTSS2SL:
+ case ACVTTSS2SQ:
+ if(copyas(&p->to, v)) {
+ if(s != A)
+ return copysub(&p->from, v, s, 1);
+ if(copyau(&p->from, v))
+ return 4;
+ return 3;
+ }
+ goto caseread;
+
+ case AROLB:
+ case AROLL:
+ case AROLQ:
+ case AROLW:
+ case ARORB:
+ case ARORL:
+ case ARORQ:
+ case ARORW:
+ case ASALB:
+ case ASALL:
+ case ASALQ:
+ case ASALW:
+ case ASARB:
+ case ASARL:
+ case ASARQ:
+ case ASARW:
+ case ASHLB:
+ case ASHLL:
+ case ASHLQ:
+ case ASHLW:
+ case ASHRB:
+ case ASHRL:
+ case ASHRQ:
+ case ASHRW:
+ if(copyas(&p->to, v))
+ return 2;
+ if(copyas(&p->from, v))
+ if(p->from.type == D_CX)
+ return 2;
+ goto caseread;
+
+ case AADDB: /* rhs rar */
+ case AADDL:
+ case AADDQ:
+ case AADDW:
+ case AANDB:
+ case AANDL:
+ case AANDQ:
+ case AANDW:
+ case ADECL:
+ case ADECQ:
+ case ADECW:
+ case AINCL:
+ case AINCQ:
+ case AINCW:
+ case ASUBB:
+ case ASUBL:
+ case ASUBQ:
+ case ASUBW:
+ case AORB:
+ case AORL:
+ case AORQ:
+ case AORW:
+ case AXORB:
+ case AXORL:
+ case AXORQ:
+ case AXORW:
+ case AMOVB:
+ case AMOVW:
+
+ case AADDSD:
+ case AADDSS:
+ case ACMPSD:
+ case ACMPSS:
+ case ADIVSD:
+ case ADIVSS:
+ case AMAXSD:
+ case AMAXSS:
+ case AMINSD:
+ case AMINSS:
+ case AMULSD:
+ case AMULSS:
+ case ARCPSS:
+ case ARSQRTSS:
+ case ASQRTSD:
+ case ASQRTSS:
+ case ASUBSD:
+ case ASUBSS:
+ case AXORPD:
+ if(copyas(&p->to, v))
+ return 2;
+ goto caseread;
+
+ case ACMPL: /* read only */
+ case ACMPW:
+ case ACMPB:
+ case ACMPQ:
+
+ case ACOMISD:
+ case ACOMISS:
+ case AUCOMISD:
+ case AUCOMISS:
+ caseread:
+ if(s != A) {
+ if(copysub(&p->from, v, s, 1))
+ return 1;
+ return copysub(&p->to, v, s, 1);
+ }
+ if(copyau(&p->from, v))
+ return 1;
+ if(copyau(&p->to, v))
+ return 1;
+ break;
+
+ case AJGE: /* no reference */
+ case AJNE:
+ case AJLE:
+ case AJEQ:
+ case AJHI:
+ case AJLS:
+ case AJMI:
+ case AJPL:
+ case AJGT:
+ case AJLT:
+ case AJCC:
+ case AJCS:
+
+ case AADJSP:
+ case AWAIT:
+ case ACLD:
+ break;
+
+ case AIMULL:
+ case AIMULQ:
+ case AIMULW:
+ if(p->to.type != D_NONE) {
+ if(copyas(&p->to, v))
+ return 2;
+ goto caseread;
+ }
+
+ case ADIVB:
+ case ADIVL:
+ case ADIVQ:
+ case ADIVW:
+ case AIDIVB:
+ case AIDIVL:
+ case AIDIVQ:
+ case AIDIVW:
+ case AIMULB:
+ case AMULB:
+ case AMULL:
+ case AMULQ:
+ case AMULW:
+
+ case ACWD:
+ case ACDQ:
+ case ACQO:
+ if(v->type == D_AX || v->type == D_DX)
+ return 2;
+ goto caseread;
+
+ case AMOVSL:
+ case AMOVSQ:
+ case AREP:
+ case AREPN:
+ if(v->type == D_CX || v->type == D_DI || v->type == D_SI)
+ return 2;
+ goto caseread;
+
+ case AJMP: /* funny */
+ if(s != A) {
+ if(copysub(&p->to, v, s, 1))
+ return 1;
+ return 0;
+ }
+ if(copyau(&p->to, v))
+ return 1;
+ return 0;
+
+ case ARET: /* funny */
+ if(v->type == REGRET || v->type == FREGRET)
+ return 2;
+ if(s != A)
+ return 1;
+ return 3;
+
+ case ACALL: /* funny */
+ if(REGEXT && v->type <= REGEXT && v->type > exregoffset)
+ return 2;
+ if(REGARG && v->type == REGARG)
+ return 2;
+
+ if(s != A) {
+ if(copysub(&p->to, v, s, 1))
+ return 1;
+ return 0;
+ }
+ if(copyau(&p->to, v))
+ return 4;
+ return 3;
+
+ case ATEXT: /* funny */
+ if(REGARG && v->type == REGARG)
+ return 3;
+ return 0;
+ }
+ return 0;
+}
+
+/*
+ * direct reference,
+ * could be set/use depending on
+ * semantics
+ */
+int
+copyas(Adr *a, Adr *v)
+{
+ if(a->type != v->type)
+ return 0;
+ if(regtyp(v))
+ return 1;
+ if(v->type == D_AUTO || v->type == D_PARAM)
+ if(v->offset == a->offset)
+ return 1;
+ return 0;
+}
+
+/*
+ * either direct or indirect
+ */
+int
+copyau(Adr *a, Adr *v)
+{
+
+ if(copyas(a, v))
+ return 1;
+ if(regtyp(v)) {
+ if(a->type-D_INDIR == v->type)
+ return 1;
+ if(a->index == v->type)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * substitute s for v in a
+ * return failure to substitute
+ */
+int
+copysub(Adr *a, Adr *v, Adr *s, int f)
+{
+ int t;
+
+ if(copyas(a, v)) {
+ t = s->type;
+ if(t >= D_AX && t <= D_R15 || t >= D_X0 && t <= D_X0+15) {
+ if(f)
+ a->type = t;
+ }
+ return 0;
+ }
+ if(regtyp(v)) {
+ t = v->type;
+ if(a->type == t+D_INDIR) {
+ if((s->type == D_BP || s->type == D_R13) && a->index != D_NONE)
+ return 1; /* can't use BP-base with index */
+ if(f)
+ a->type = s->type+D_INDIR;
+// return 0;
+ }
+ if(a->index == t) {
+ if(f)
+ a->index = s->type;
+ return 0;
+ }
+ return 0;
+ }
+ return 0;
+}
--- /dev/null
+// Derived from Inferno utils/6c/reg.c
+// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "gg.h"
+#undef EXTERN
+#define EXTERN
+#include "opt.h"
+
+#define P2R(p) (Reg*)(p->reg)
+#define MAGIC 0xb00fbabe
+
+static first = 1;
+static void dumpit(char *str, Reg *r0);
+static int noreturn(Prog *p);
+
+Reg*
+rega(void)
+{
+ Reg *r;
+
+ r = freer;
+ if(r == R) {
+ r = mal(sizeof(*r));
+ } else
+ freer = r->link;
+
+ *r = zreg;
+ return r;
+}
+
+int
+rcmp(const void *a1, const void *a2)
+{
+ Rgn *p1, *p2;
+ int c1, c2;
+
+ p1 = (Rgn*)a1;
+ p2 = (Rgn*)a2;
+ c1 = p2->cost;
+ c2 = p1->cost;
+ if(c1 -= c2)
+ return c1;
+ return p2->varno - p1->varno;
+}
+
+void
+regopt(Prog *firstp)
+{
+ Reg *r, *r1, *r2;
+ Prog *p1, *p;
+ int i, z, nr;
+ uint32 vreg;
+ Bits bit;
+
+ if(first) {
+ fmtinstall('Q', Qconv);
+ first = 0;
+ }
+
+ firstr = R;
+ lastr = R;
+ nvar = 0;
+ regbits = RtoB(D_SP);
+ for(z=0; z<BITS; z++) {
+ externs.b[z] = 0;
+ params.b[z] = 0;
+ consts.b[z] = 0;
+ addrs.b[z] = 0;
+ }
+
+ /*
+ * pass 1
+ * build aux data structure
+ * allocate pcs
+ * find use and set of variables
+ */
+ nr = 0;
+ for(p=firstp; p!=P; p=p->link) {
+ switch(p->as) {
+ case ADATA:
+ case AGLOBL:
+ case ANAME:
+ case ASIGNAME:
+ continue;
+ }
+ r = rega();
+ nr++;
+ if(firstr == R) {
+ firstr = r;
+ lastr = r;
+ } else {
+ lastr->link = r;
+ r->p1 = lastr;
+ lastr->s1 = r;
+ lastr = r;
+ }
+ r->prog = p;
+ p->reg = r;
+
+ r1 = r->p1;
+ if(r1 != R) {
+ switch(r1->prog->as) {
+ case ARET:
+ case AJMP:
+ case AIRETL:
+ case AIRETQ:
+ r->p1 = R;
+ r1->s1 = R;
+ }
+ }
+
+ bit = mkvar(r, &p->from);
+ if(bany(&bit))
+ switch(p->as) {
+ /*
+ * funny
+ */
+ case ALEAL:
+ case ALEAQ:
+ for(z=0; z<BITS; z++)
+ addrs.b[z] |= bit.b[z];
+ break;
+
+ /*
+ * left side read
+ */
+ default:
+ for(z=0; z<BITS; z++)
+ r->use1.b[z] |= bit.b[z];
+ break;
+ }
+
+ bit = mkvar(r, &p->to);
+ if(bany(&bit))
+ switch(p->as) {
+ default:
+ yyerror("reg: unknown op: %A", p->as);
+ break;
+
+ /*
+ * right side read
+ */
+ case ACMPB:
+ case ACMPL:
+ case ACMPQ:
+ case ACMPW:
+ case ACOMISS:
+ case ACOMISD:
+ case AUCOMISS:
+ case AUCOMISD:
+ for(z=0; z<BITS; z++)
+ r->use2.b[z] |= bit.b[z];
+ break;
+
+ /*
+ * right side write
+ */
+ case ANOP:
+ case AMOVL:
+ case AMOVQ:
+ case AMOVB:
+ case AMOVW:
+ case AMOVBLSX:
+ case AMOVBLZX:
+ case AMOVBQSX:
+ case AMOVBQZX:
+ case AMOVLQSX:
+ case AMOVLQZX:
+ case AMOVWLSX:
+ case AMOVWLZX:
+ case AMOVWQSX:
+ case AMOVWQZX:
+
+ case AMOVSS:
+ case AMOVSD:
+ case ACVTSD2SL:
+ case ACVTSD2SQ:
+ case ACVTSD2SS:
+ case ACVTSL2SD:
+ case ACVTSL2SS:
+ case ACVTSQ2SD:
+ case ACVTSQ2SS:
+ case ACVTSS2SD:
+ case ACVTSS2SL:
+ case ACVTSS2SQ:
+ case ACVTTSD2SL:
+ case ACVTTSD2SQ:
+ case ACVTTSS2SL:
+ case ACVTTSS2SQ:
+ for(z=0; z<BITS; z++)
+ r->set.b[z] |= bit.b[z];
+ break;
+
+ /*
+ * right side read+write
+ */
+ case AADDB:
+ case AADDL:
+ case AADDQ:
+ case AADDW:
+ case AANDB:
+ case AANDL:
+ case AANDQ:
+ case AANDW:
+ case ASUBB:
+ case ASUBL:
+ case ASUBQ:
+ case ASUBW:
+ case AORB:
+ case AORL:
+ case AORQ:
+ case AORW:
+ case AXORB:
+ case AXORL:
+ case AXORQ:
+ case AXORW:
+ case ASALB:
+ case ASALL:
+ case ASALQ:
+ case ASALW:
+ case ASARB:
+ case ASARL:
+ case ASARQ:
+ case ASARW:
+ case AROLB:
+ case AROLL:
+ case AROLQ:
+ case AROLW:
+ case ARORB:
+ case ARORL:
+ case ARORQ:
+ case ARORW:
+ case ASHLB:
+ case ASHLL:
+ case ASHLQ:
+ case ASHLW:
+ case ASHRB:
+ case ASHRL:
+ case ASHRQ:
+ case ASHRW:
+ case AIMULL:
+ case AIMULQ:
+ case AIMULW:
+ case ANEGL:
+ case ANEGQ:
+ case ANOTL:
+ case ANOTQ:
+ case AADCL:
+ case AADCQ:
+ case ASBBL:
+ case ASBBQ:
+
+ case AADDSD:
+ case AADDSS:
+ case ACMPSD:
+ case ACMPSS:
+ case ADIVSD:
+ case ADIVSS:
+ case AMAXSD:
+ case AMAXSS:
+ case AMINSD:
+ case AMINSS:
+ case AMULSD:
+ case AMULSS:
+ case ARCPSS:
+ case ARSQRTSS:
+ case ASQRTSD:
+ case ASQRTSS:
+ case ASUBSD:
+ case ASUBSS:
+ case AXORPD:
+ for(z=0; z<BITS; z++) {
+ r->set.b[z] |= bit.b[z];
+ r->use2.b[z] |= bit.b[z];
+ }
+ break;
+
+ /*
+ * funny
+ */
+ case ACALL:
+ for(z=0; z<BITS; z++)
+ addrs.b[z] |= bit.b[z];
+ break;
+ }
+
+ switch(p->as) {
+ case AIMULL:
+ case AIMULQ:
+ case AIMULW:
+ if(p->to.type != D_NONE)
+ break;
+
+ case AIDIVB:
+ case AIDIVL:
+ case AIDIVQ:
+ case AIDIVW:
+ case AIMULB:
+ case ADIVB:
+ case ADIVL:
+ case ADIVQ:
+ case ADIVW:
+ case AMULB:
+ case AMULL:
+ case AMULQ:
+ case AMULW:
+
+ case ACWD:
+ case ACDQ:
+ case ACQO:
+ r->regu |= RtoB(D_AX) | RtoB(D_DX);
+ break;
+
+ case AREP:
+ case AREPN:
+ case ALOOP:
+ case ALOOPEQ:
+ case ALOOPNE:
+ r->regu |= RtoB(D_CX);
+ break;
+
+ case AMOVSB:
+ case AMOVSL:
+ case AMOVSQ:
+ case AMOVSW:
+ case ACMPSB:
+ case ACMPSL:
+ case ACMPSQ:
+ case ACMPSW:
+ r->regu |= RtoB(D_SI) | RtoB(D_DI);
+ break;
+
+ case ASTOSB:
+ case ASTOSL:
+ case ASTOSQ:
+ case ASTOSW:
+ case ASCASB:
+ case ASCASL:
+ case ASCASQ:
+ case ASCASW:
+ r->regu |= RtoB(D_AX) | RtoB(D_DI);
+ break;
+
+ case AINSB:
+ case AINSL:
+ case AINSW:
+ case AOUTSB:
+ case AOUTSL:
+ case AOUTSW:
+ r->regu |= RtoB(D_DI) | RtoB(D_DX);
+ break;
+ }
+ }
+ if(firstr == R)
+ return;
+//dumpit("pass1", firstr);
+
+ /*
+ * pass 2
+ * turn branch references to pointers
+ * build back pointers
+ */
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ if(p->to.type == D_BRANCH) {
+ if(p->to.branch == P)
+ fatal("pnil %P", p);
+ r1 = p->to.branch->reg;
+ if(r1 == R)
+ fatal("rnil %P", p);
+ if(r1 == r) {
+ fatal("ref to self %P", p);
+ continue;
+ }
+ r->s2 = r1;
+ r->p2link = r1->p2;
+ r1->p2 = r;
+ }
+ }
+//dumpit("pass2", firstr);
+
+ /*
+ * pass 2.5
+ * find looping structure
+ */
+ for(r = firstr; r != R; r = r->link)
+ r->active = 0;
+ change = 0;
+ loopit(firstr, nr);
+//dumpit("pass2.5", firstr);
+
+ /*
+ * pass 3
+ * iterate propagating usage
+ * back until flow graph is complete
+ */
+loop1:
+ change = 0;
+ for(r = firstr; r != R; r = r->link)
+ r->active = 0;
+ for(r = firstr; r != R; r = r->link)
+ if(r->prog->as == ARET)
+ prop(r, zbits, zbits);
+loop11:
+ /* pick up unreachable code */
+ i = 0;
+ for(r = firstr; r != R; r = r1) {
+ r1 = r->link;
+ if(r1 && r1->active && !r->active) {
+ prop(r, zbits, zbits);
+ i = 1;
+ }
+ }
+ if(i)
+ goto loop11;
+ if(change)
+ goto loop1;
+
+//dumpit("pass3", firstr);
+
+ /*
+ * pass 4
+ * iterate propagating register/variable synchrony
+ * forward until graph is complete
+ */
+loop2:
+ change = 0;
+ for(r = firstr; r != R; r = r->link)
+ r->active = 0;
+ synch(firstr, zbits);
+ if(change)
+ goto loop2;
+
+//dumpit("pass4", firstr);
+
+ /*
+ * pass 5
+ * isolate regions
+ * calculate costs (paint1)
+ */
+ r = firstr;
+ if(r) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
+ ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
+ if(bany(&bit)) {
+ warn("used and not set: %Q", bit);
+ if(debug['R'] && !debug['w'])
+ print("used and not set: %Q\n", bit);
+ }
+ }
+ for(r = firstr; r != R; r = r->link)
+ r->act = zbits;
+ rgp = region;
+ nregion = 0;
+ for(r = firstr; r != R; r = r->link) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = r->set.b[z] &
+ ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
+ if(bany(&bit)) {
+ warn("set and not used: %Q", bit);
+ if(debug['R'])
+ print("set and not used: %Q\n", bit);
+ excise(r);
+ }
+ for(z=0; z<BITS; z++)
+ bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
+ while(bany(&bit)) {
+ i = bnum(bit);
+ rgp->enter = r;
+ rgp->varno = i;
+ change = 0;
+ if(debug['R'] && debug['v'])
+ print("\n");
+ paint1(r, i);
+ bit.b[i/32] &= ~(1L<<(i%32));
+ if(change <= 0) {
+ if(debug['R'])
+ print("%L$%d: %Q\n",
+ r->prog->lineno, change, blsh(i));
+ continue;
+ }
+ rgp->cost = change;
+ nregion++;
+ if(nregion >= NRGN) {
+ fatal("too many regions");
+ goto brk;
+ }
+ rgp++;
+ }
+ }
+brk:
+ qsort(region, nregion, sizeof(region[0]), rcmp);
+
+ /*
+ * pass 6
+ * determine used registers (paint2)
+ * replace code (paint3)
+ */
+ rgp = region;
+ for(i=0; i<nregion; i++) {
+ bit = blsh(rgp->varno);
+ vreg = paint2(rgp->enter, rgp->varno);
+ vreg = allreg(vreg, rgp);
+ if(rgp->regno != 0)
+ paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
+ rgp++;
+ }
+
+ /*
+ * pass 7
+ * peep-hole on basic block
+ */
+ if(debug['P']) {
+ peep();
+ }
+
+ /*
+ * eliminate nops
+ * free aux structures
+ */
+ for(p=firstp; p!=P; p=p->link) {
+ while(p->link && p->link->as == ANOP)
+ p->link = p->link->link;
+ }
+
+ if(r1 != R) {
+ r1->link = freer;
+ freer = firstr;
+ }
+}
+
+/*
+ * add mov b,rn
+ * just after r
+ */
+void
+addmove(Reg *r, int bn, int rn, int f)
+{
+ Prog *p, *p1;
+ Adr *a;
+ Var *v;
+
+ p1 = mal(sizeof(*p1));
+ clearp(p1);
+ p1->loc = 9999;
+
+ p = r->prog;
+ p1->link = p->link;
+ p->link = p1;
+ p1->lineno = p->lineno;
+
+ v = var + bn;
+
+ a = &p1->to;
+ a->sym = v->sym;
+ a->offset = v->offset;
+ a->etype = v->etype;
+ a->type = v->name;
+
+ // need to chean this up with wptr and
+ // some of the defaults
+ p1->as = AMOVL;
+ switch(v->etype) {
+ default:
+ fatal("unknown type\n");
+ case TINT8:
+ case TUINT8:
+ case TBOOL:
+ p1->as = AMOVB;
+ break;
+ case TINT16:
+ case TUINT16:
+ p1->as = AMOVW;
+ break;
+ case TINT64:
+ case TUINT64:
+ case TUINTPTR:
+ case TPTR64:
+ p1->as = AMOVQ;
+ break;
+ case TFLOAT:
+ case TFLOAT32:
+ p1->as = AMOVSS;
+ break;
+ case TFLOAT64:
+ p1->as = AMOVSS;
+ break;
+ case TINT:
+ case TUINT:
+ case TINT32:
+ case TUINT32:
+ case TPTR32:
+ break;
+ }
+
+ p1->from.type = rn;
+ if(!f) {
+ p1->from = *a;
+ *a = zprog.from;
+ a->type = rn;
+ if(v->etype == TUINT8)
+ p1->as = AMOVB;
+ if(v->etype == TUINT16)
+ p1->as = AMOVW;
+ }
+// if(debug['R'])
+ print("%P\t.a%P\n", p, p1);
+}
+
+uint32
+doregbits(int r)
+{
+ uint32 b;
+
+ b = 0;
+ if(r >= D_INDIR)
+ r -= D_INDIR;
+ if(r >= D_AX && r <= D_R15)
+ b |= RtoB(r);
+ else
+ if(r >= D_AL && r <= D_R15B)
+ b |= RtoB(r-D_AL+D_AX);
+ else
+ if(r >= D_AH && r <= D_BH)
+ b |= RtoB(r-D_AH+D_AX);
+ else
+ if(r >= D_X0 && r <= D_X0+15)
+ b |= FtoB(r);
+ return b;
+}
+
+Bits
+mkvar(Reg *r, Adr *a)
+{
+ Var *v;
+ int i, t, n, et, z;
+ int32 o;
+ Bits bit;
+ Sym *s;
+
+ /*
+ * mark registers used
+ */
+ t = a->type;
+ r->regu |= doregbits(t);
+ r->regu |= doregbits(a->index);
+
+ switch(t) {
+ default:
+ goto none;
+ case D_ADDR:
+ a->type = a->index;
+ bit = mkvar(r, a);
+ for(z=0; z<BITS; z++)
+ addrs.b[z] |= bit.b[z];
+ a->type = t;
+ goto none;
+ case D_EXTERN:
+ case D_STATIC:
+ case D_PARAM:
+ case D_AUTO:
+ n = t;
+ break;
+ }
+ s = a->sym;
+ if(s == S)
+ goto none;
+// if(s->name[0] == '.')
+// goto none;
+ et = a->etype;
+ o = a->offset;
+ v = var;
+ for(i=0; i<nvar; i++) {
+ if(s == v->sym)
+ if(n == v->name)
+ if(o == v->offset)
+ goto out;
+ v++;
+ }
+
+ switch(et) {
+ case TFUNC:
+ case TARRAY:
+ case 0:
+ goto none;
+ }
+
+ if(nvar >= NVAR) {
+ if(debug['w'] > 1 && s)
+ fatal("variable not optimized: %s", s->name);
+ goto none;
+ }
+ i = nvar;
+ nvar++;
+ v = &var[i];
+ v->sym = s;
+ v->offset = o;
+ v->name = n;
+ v->etype = et;
+ if(debug['R'])
+ print("bit=%2d et=%2d %D\n", i, et, a);
+
+out:
+ bit = blsh(i);
+ if(n == D_EXTERN || n == D_STATIC)
+ for(z=0; z<BITS; z++)
+ externs.b[z] |= bit.b[z];
+ if(n == D_PARAM)
+ for(z=0; z<BITS; z++)
+ params.b[z] |= bit.b[z];
+ if(v->etype != et) {
+ /* funny punning */
+print("pun %d %d %S\n", v->etype, et, s);
+ for(z=0; z<BITS; z++)
+ addrs.b[z] |= bit.b[z];
+ }
+
+ return bit;
+
+none:
+ return zbits;
+}
+
+void
+prop(Reg *r, Bits ref, Bits cal)
+{
+ Reg *r1, *r2;
+ int z;
+
+ for(r1 = r; r1 != R; r1 = r1->p1) {
+ for(z=0; z<BITS; z++) {
+ ref.b[z] |= r1->refahead.b[z];
+ if(ref.b[z] != r1->refahead.b[z]) {
+ r1->refahead.b[z] = ref.b[z];
+ change++;
+ }
+ cal.b[z] |= r1->calahead.b[z];
+ if(cal.b[z] != r1->calahead.b[z]) {
+ r1->calahead.b[z] = cal.b[z];
+ change++;
+ }
+ }
+ switch(r1->prog->as) {
+ case ACALL:
+ if(noreturn(r1->prog))
+ break;
+ for(z=0; z<BITS; z++) {
+ cal.b[z] |= ref.b[z] | externs.b[z];
+ ref.b[z] = 0;
+ }
+ break;
+
+ case ATEXT:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = 0;
+ ref.b[z] = 0;
+ }
+ break;
+
+ case ARET:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = externs.b[z];
+ ref.b[z] = 0;
+ }
+ }
+ for(z=0; z<BITS; z++) {
+ ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
+ r1->use1.b[z] | r1->use2.b[z];
+ cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
+ r1->refbehind.b[z] = ref.b[z];
+ r1->calbehind.b[z] = cal.b[z];
+ }
+ if(r1->active)
+ break;
+ r1->active = 1;
+ }
+ for(; r != r1; r = r->p1)
+ for(r2 = r->p2; r2 != R; r2 = r2->p2link)
+ prop(r2, r->refbehind, r->calbehind);
+}
+
+/*
+ * find looping structure
+ *
+ * 1) find reverse postordering
+ * 2) find approximate dominators,
+ * the actual dominators if the flow graph is reducible
+ * otherwise, dominators plus some other non-dominators.
+ * See Matthew S. Hecht and Jeffrey D. Ullman,
+ * "Analysis of a Simple Algorithm for Global Data Flow Problems",
+ * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
+ * Oct. 1-3, 1973, pp. 207-217.
+ * 3) find all nodes with a predecessor dominated by the current node.
+ * such a node is a loop head.
+ * recursively, all preds with a greater rpo number are in the loop
+ */
+int32
+postorder(Reg *r, Reg **rpo2r, int32 n)
+{
+ Reg *r1;
+
+ r->rpo = 1;
+ r1 = r->s1;
+ if(r1 && !r1->rpo)
+ n = postorder(r1, rpo2r, n);
+ r1 = r->s2;
+ if(r1 && !r1->rpo)
+ n = postorder(r1, rpo2r, n);
+ rpo2r[n] = r;
+ n++;
+ return n;
+}
+
+int32
+rpolca(int32 *idom, int32 rpo1, int32 rpo2)
+{
+ int32 t;
+
+ if(rpo1 == -1)
+ return rpo2;
+ while(rpo1 != rpo2){
+ if(rpo1 > rpo2){
+ t = rpo2;
+ rpo2 = rpo1;
+ rpo1 = t;
+ }
+ while(rpo1 < rpo2){
+ t = idom[rpo2];
+ if(t >= rpo2)
+ fatal("bad idom");
+ rpo2 = t;
+ }
+ }
+ return rpo1;
+}
+
+int
+doms(int32 *idom, int32 r, int32 s)
+{
+ while(s > r)
+ s = idom[s];
+ return s == r;
+}
+
+int
+loophead(int32 *idom, Reg *r)
+{
+ int32 src;
+
+ src = r->rpo;
+ if(r->p1 != R && doms(idom, src, r->p1->rpo))
+ return 1;
+ for(r = r->p2; r != R; r = r->p2link)
+ if(doms(idom, src, r->rpo))
+ return 1;
+ return 0;
+}
+
+void
+loopmark(Reg **rpo2r, int32 head, Reg *r)
+{
+ if(r->rpo < head || r->active == head)
+ return;
+ r->active = head;
+ r->loop += LOOP;
+ if(r->p1 != R)
+ loopmark(rpo2r, head, r->p1);
+ for(r = r->p2; r != R; r = r->p2link)
+ loopmark(rpo2r, head, r);
+}
+
+void
+loopit(Reg *r, int32 nr)
+{
+ Reg *r1;
+ int32 i, d, me;
+
+ if(nr > maxnr) {
+ rpo2r = mal(nr * sizeof(Reg*));
+ idom = mal(nr * sizeof(int32));
+ maxnr = nr;
+ }
+
+ d = postorder(r, rpo2r, 0);
+ if(d > nr)
+ fatal("too many reg nodes %d %d", d, nr);
+ nr = d;
+ for(i = 0; i < nr / 2; i++) {
+ r1 = rpo2r[i];
+ rpo2r[i] = rpo2r[nr - 1 - i];
+ rpo2r[nr - 1 - i] = r1;
+ }
+ for(i = 0; i < nr; i++)
+ rpo2r[i]->rpo = i;
+
+ idom[0] = 0;
+ for(i = 0; i < nr; i++) {
+ r1 = rpo2r[i];
+ me = r1->rpo;
+ d = -1;
+ if(r1->p1 != R && r1->p1->rpo < me)
+ d = r1->p1->rpo;
+ for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
+ if(r1->rpo < me)
+ d = rpolca(idom, d, r1->rpo);
+ idom[i] = d;
+ }
+
+ for(i = 0; i < nr; i++) {
+ r1 = rpo2r[i];
+ r1->loop++;
+ if(r1->p2 != R && loophead(idom, r1))
+ loopmark(rpo2r, i, r1);
+ }
+}
+
+void
+synch(Reg *r, Bits dif)
+{
+ Reg *r1;
+ int z;
+
+ for(r1 = r; r1 != R; r1 = r1->s1) {
+ for(z=0; z<BITS; z++) {
+ dif.b[z] = (dif.b[z] &
+ ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
+ r1->set.b[z] | r1->regdiff.b[z];
+ if(dif.b[z] != r1->regdiff.b[z]) {
+ r1->regdiff.b[z] = dif.b[z];
+ change++;
+ }
+ }
+ if(r1->active)
+ break;
+ r1->active = 1;
+ for(z=0; z<BITS; z++)
+ dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
+ if(r1->s2 != R)
+ synch(r1->s2, dif);
+ }
+}
+
+uint32
+allreg(uint32 b, Rgn *r)
+{
+ Var *v;
+ int i;
+
+ v = var + r->varno;
+ r->regno = 0;
+ switch(v->etype) {
+
+ default:
+ fatal("unknown etype %d/%d", bitno(b), v->etype);
+ break;
+
+ case TINT8:
+ case TUINT8:
+ case TINT16:
+ case TUINT16:
+ case TINT32:
+ case TUINT32:
+ case TINT64:
+ case TUINT64:
+ case TINT:
+ case TUINT:
+ case TUINTPTR:
+ case TBOOL:
+ case TPTR32:
+ case TPTR64:
+ i = BtoR(~b);
+ if(i && r->cost > 0) {
+ r->regno = i;
+ return RtoB(i);
+ }
+ break;
+
+ case TFLOAT32:
+ case TFLOAT64:
+ case TFLOAT80:
+ case TFLOAT:
+ i = BtoF(~b);
+ if(i && r->cost > 0) {
+ r->regno = i;
+ return FtoB(i);
+ }
+ break;
+ }
+ return 0;
+}
+
+void
+paint1(Reg *r, int bn)
+{
+ Reg *r1;
+ Prog *p;
+ int z;
+ uint32 bb;
+
+ z = bn/32;
+ bb = 1L<<(bn%32);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = r->p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
+ change -= CLOAD * r->loop;
+ if(debug['R'] && debug['v'])
+ print("%ld%P\tld %Q $%d\n", r->loop,
+ r->prog, blsh(bn), change);
+ }
+ for(;;) {
+ r->act.b[z] |= bb;
+ p = r->prog;
+
+ if(r->use1.b[z] & bb) {
+ change += CREF * r->loop;
+ if(debug['R'] && debug['v'])
+ print("%ld%P\tu1 %Q $%d\n", r->loop,
+ p, blsh(bn), change);
+ }
+
+ if((r->use2.b[z]|r->set.b[z]) & bb) {
+ change += CREF * r->loop;
+ if(debug['R'] && debug['v'])
+ print("%ld%P\tu2 %Q $%d\n", r->loop,
+ p, blsh(bn), change);
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb) {
+ change -= CLOAD * r->loop;
+ if(debug['R'] && debug['v'])
+ print("%ld%P\tst %Q $%d\n", r->loop,
+ p, blsh(bn), change);
+ }
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+ if(r1->refahead.b[z] & bb)
+ paint1(r1, bn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = r->s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint1(r1, bn);
+ r = r->s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+uint32
+regset(Reg *r, uint32 bb)
+{
+ uint32 b, set;
+ Adr v;
+ int c;
+
+ set = 0;
+ v = zprog.from;
+ while(b = bb & ~(bb-1)) {
+ v.type = b & 0xFFFF? BtoR(b): BtoF(b);
+ if(v.type == 0)
+ fatal("zero v.type for %#lux", b);
+ c = copyu(r->prog, &v, A);
+ if(c == 3)
+ set |= b;
+ bb &= ~b;
+ }
+ return set;
+}
+
+uint32
+reguse(Reg *r, uint32 bb)
+{
+ uint32 b, set;
+ Adr v;
+ int c;
+
+ set = 0;
+ v = zprog.from;
+ while(b = bb & ~(bb-1)) {
+ v.type = b & 0xFFFF? BtoR(b): BtoF(b);
+ c = copyu(r->prog, &v, A);
+ if(c == 1 || c == 2 || c == 4)
+ set |= b;
+ bb &= ~b;
+ }
+ return set;
+}
+
+uint32
+paint2(Reg *r, int bn)
+{
+ Reg *r1;
+ int z;
+ uint32 bb, vreg, x;
+
+ z = bn/32;
+ bb = 1L << (bn%32);
+ vreg = regbits;
+ if(!(r->act.b[z] & bb))
+ return vreg;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = r->p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(!(r1->act.b[z] & bb))
+ break;
+ r = r1;
+ }
+ for(;;) {
+ r->act.b[z] &= ~bb;
+
+ vreg |= r->regu;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+ if(r1->refahead.b[z] & bb)
+ vreg |= paint2(r1, bn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = r->s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ vreg |= paint2(r1, bn);
+ r = r->s1;
+ if(r == R)
+ break;
+ if(!(r->act.b[z] & bb))
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+
+ bb = vreg;
+ for(; r; r=r->s1) {
+ x = r->regu & ~bb;
+ if(x) {
+ vreg |= reguse(r, x);
+ bb |= regset(r, x);
+ }
+ }
+ return vreg;
+}
+
+void
+paint3(Reg *r, int bn, int32 rb, int rn)
+{
+ Reg *r1;
+ Prog *p;
+ int z;
+ uint32 bb;
+
+ z = bn/32;
+ bb = 1L << (bn%32);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = r->p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
+ addmove(r, bn, rn, 0);
+ for(;;) {
+ r->act.b[z] |= bb;
+ p = r->prog;
+
+ if(r->use1.b[z] & bb) {
+ if(debug['R'])
+ print("%P", p);
+ addreg(&p->from, rn);
+ if(debug['R'])
+ print("\t.c%P\n", p);
+ }
+ if((r->use2.b[z]|r->set.b[z]) & bb) {
+ if(debug['R'])
+ print("%P", p);
+ addreg(&p->to, rn);
+ if(debug['R'])
+ print("\t.c%P\n", p);
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb)
+ addmove(r, bn, rn, 1);
+ r->regu |= rb;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+ if(r1->refahead.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = r->s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+ r = r->s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+void
+addreg(Adr *a, int rn)
+{
+
+ a->sym = 0;
+ a->offset = 0;
+ a->type = rn;
+}
+
+int32
+RtoB(int r)
+{
+
+ if(r < D_AX || r > D_R15)
+ return 0;
+ return 1L << (r-D_AX);
+}
+
+int
+BtoR(int32 b)
+{
+
+ b &= 0xffffL;
+ if(b == 0)
+ return 0;
+ return bitno(b) + D_AX;
+}
+
+/*
+ * bit reg
+ * 16 X5
+ * 17 X6
+ * 18 X7
+ */
+int32
+FtoB(int f)
+{
+ if(f < FREGMIN || f > FREGEXT)
+ return 0;
+ return 1L << (f - FREGMIN + 16);
+}
+
+int
+BtoF(int32 b)
+{
+
+ b &= 0x70000L;
+ if(b == 0)
+ return 0;
+ return bitno(b) - 16 + FREGMIN;
+}
+
+static void
+dumpit(char *str, Reg *r0)
+{
+ Reg *r, *r1;
+ int z;
+ Bits bit;
+
+ print("\n%s\n", str);
+ for(r = r0; r != R; r = r->link) {
+ print("%ld:%P", r->loop, r->prog);
+ for(z=0; z<BITS; z++)
+ bit.b[z] =
+ r->set.b[z] |
+ r->use1.b[z] |
+ r->use2.b[z] |
+ r->refbehind.b[z] |
+ r->refahead.b[z] |
+ r->calbehind.b[z] |
+ r->calahead.b[z] |
+ r->regdiff.b[z] |
+ r->act.b[z] |
+ 0;
+ if(bany(&bit)) {
+ print("\t");
+ if(bany(&r->set))
+ print(" s:%Q", r->set);
+ if(bany(&r->use1))
+ print(" u1:%Q", r->use1);
+ if(bany(&r->use2))
+ print(" u2:%Q", r->use2);
+ if(bany(&r->refbehind))
+ print(" rb:%Q ", r->refbehind);
+ if(bany(&r->refahead))
+ print(" ra:%Q ", r->refahead);
+ if(bany(&r->calbehind))
+ print("cb:%Q ", r->calbehind);
+ if(bany(&r->calahead))
+ print(" ca:%Q ", r->calahead);
+ if(bany(&r->regdiff))
+ print(" d:%Q ", r->regdiff);
+ if(bany(&r->act))
+ print(" a:%Q ", r->act);
+ }
+ print("\n");
+ r1 = r->p2;
+ if(r1 != R) {
+ print(" pred:");
+ for(; r1 != R; r1 = r1->p2link)
+ print(" %.4lud", r1->prog->loc);
+ print("\n");
+ }
+// r1 = r->s1;
+// if(r1 != R) {
+// print(" succ:");
+// for(; r1 != R; r1 = r1->s1)
+// print(" %.4lud", r1->prog->loc);
+// print("\n");
+// }
+ }
+}
+
+static Sym* symlist[10];
+
+static int
+noreturn(Prog *p)
+{
+ Sym *s;
+ int i;
+
+ if(symlist[0] == S) {
+ symlist[0] = pkglookup("throwindex", "sys");
+ }
+
+ s = p->to.sym;
+ if(s == S)
+ return 0;
+ for(i=0; symlist[i]!=S; i++)
+ if(s == symlist[i])
+ return 1;
+ return 0;
+}