Input code like
0000 (x.go:2) TEXT main+0(SB),$36-0
0001 (x.go:3) MOVL $5,i+-8(SP)
0002 (x.go:3) MOVL $0,i+-4(SP)
0003 (x.go:4) MOVL $1,BX
0004 (x.go:4) MOVL i+-8(SP),AX
0005 (x.go:4) MOVL i+-4(SP),DX
0006 (x.go:4) MOVL AX,autotmp_0000+-20(SP)
0007 (x.go:4) MOVL DX,autotmp_0000+-16(SP)
0008 (x.go:4) MOVL autotmp_0000+-20(SP),CX
0009 (x.go:4) CMPL autotmp_0000+-16(SP),$0
0010 (x.go:4) JNE ,13
0011 (x.go:4) CMPL CX,$32
0012 (x.go:4) JCS ,14
0013 (x.go:4) MOVL $0,BX
0014 (x.go:4) SHLL CX,BX
0015 (x.go:4) MOVL BX,x+-12(SP)
0016 (x.go:5) MOVL x+-12(SP),AX
0017 (x.go:5) CDQ ,
0018 (x.go:5) MOVL AX,autotmp_0001+-28(SP)
0019 (x.go:5) MOVL DX,autotmp_0001+-24(SP)
0020 (x.go:5) MOVL autotmp_0001+-28(SP),AX
0021 (x.go:5) MOVL autotmp_0001+-24(SP),DX
0022 (x.go:5) MOVL AX,(SP)
0023 (x.go:5) MOVL DX,4(SP)
0024 (x.go:5) CALL ,runtime.printint+0(SB)
0025 (x.go:5) CALL ,runtime.printnl+0(SB)
0026 (x.go:6) RET ,
is problematic because the liveness range for
autotmp_0000 (0006-0009) is nested completely
inside a span where BX holds a live value (0003-0015).
Because the register allocator only looks at 0006-0009
to see which registers are used, it misses the fact that
BX is unavailable and uses it anyway.
The n->pun = anyregalloc() check in tempname is
a workaround for this bug, but I hit it again because
I did the tempname call before allocating BX, even
though I then used the temporary after storing in BX.
This should fix the real bug, and then we can remove
the workaround in tempname.
The code creates pseudo-variables for each register
and includes that information in the liveness propagation.
Then the regu fields can be populated using that more
complete information. With that approach, BX is marked
as in use on every line in the whole span 0003-0015,
so that the decision about autotmp_0000
(using only 0006-0009) still has all the information
it needs.
This is not specific to the 386, but it only happens in
generated code of the form
load R1
...
load var into R2
...
store R2 back into var
...
use R1
and for the most part the other compilers generate
the loads for a given compiled line before any of
the stores. Even so, this may not be the case everywhere,
so the change is worth making in all three.
R=ken2, ken, ken
CC=golang-dev
https://golang.org/cl/
4529106
#include "gg.h"
#include "opt.h"
+#define NREGVAR 24
+#define REGBITS ((uint32)0xffffff)
#define P2R(p) (Reg*)(p->reg)
void addsplits(void);
}
}
+static char* regname[] = {
+ ".R0",
+ ".R1",
+ ".R2",
+ ".R3",
+ ".R4",
+ ".R5",
+ ".R6",
+ ".R7",
+ ".R8",
+ ".R9",
+ ".R10",
+ ".R11",
+ ".R12",
+ ".R13",
+ ".R14",
+ ".R15",
+ ".F0",
+ ".F1",
+ ".F2",
+ ".F3",
+ ".F4",
+ ".F5",
+ ".F6",
+ ".F7",
+};
+
void
regopt(Prog *firstp)
{
int i, z, nr;
uint32 vreg;
Bits bit;
-
+
if(first == 0) {
fmtinstall('Q', Qconv);
}
r1 = R;
firstr = R;
lastr = R;
- nvar = 0;
+
+ /*
+ * control flow is more complicated in generated go code
+ * than in generated c code. define pseudo-variables for
+ * registers, so we have complete register usage information.
+ */
+ nvar = NREGVAR;
+ memset(var, 0, NREGVAR*sizeof var[0]);
+ for(i=0; i<NREGVAR; i++)
+ var[i].sym = lookup(regname[i]);
+
regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
for(z=0; z<BITS; z++) {
externs.b[z] = 0;
bit = mkvar(r, &p->from);
for(z=0; z<BITS; z++)
r->use1.b[z] |= bit.b[z];
+
+ /*
+ * middle always read when present
+ */
+ if(p->reg != NREG) {
+ if(p->from.type != D_FREG)
+ r->use1.b[0] |= RtoB(p->reg);
+ else
+ r->use1.b[0] |= FtoB(p->reg);
+ }
/*
* right side depends on opcode
default:
yyerror("reg: unknown op: %A", p->as);
break;
+
+ /*
+ * right side read
+ */
+ case ATST:
+ case ATEQ:
+ case ACMP:
+ case ACMN:
+ case ACMPD:
+ case ACMPF:
+ rightread:
+ for(z=0; z<BITS; z++)
+ r->use2.b[z] |= bit.b[z];
+ break;
+
+ /*
+ * right side read or read+write, depending on middle
+ * ADD x, z => z += x
+ * ADD x, y, z => z = x + y
+ */
+ case AADD:
+ case AAND:
+ case AEOR:
+ case ASUB:
+ case ARSB:
+ case AADC:
+ case ASBC:
+ case ARSC:
+ case AORR:
+ case ABIC:
+ case ASLL:
+ case ASRL:
+ case ASRA:
+ case AMUL:
+ case AMULU:
+ case ADIV:
+ case AMOD:
+ case AMODU:
+ case ADIVU:
+ if(p->reg != NREG)
+ goto rightread;
+ // fall through
+
+ /*
+ * right side read+write
+ */
+ case AADDF:
+ case AADDD:
+ case ASUBF:
+ case ASUBD:
+ case AMULF:
+ case AMULD:
+ case ADIVF:
+ case ADIVD:
+ case AMULAL:
+ case AMULALU:
+ for(z=0; z<BITS; z++) {
+ r->use2.b[z] |= bit.b[z];
+ r->set.b[z] |= bit.b[z];
+ }
+ break;
/*
* right side write
case ANOP:
case AMOVB:
case AMOVBU:
+ case AMOVD:
+ case AMOVDF:
+ case AMOVDW:
+ case AMOVF:
+ case AMOVFW:
case AMOVH:
case AMOVHU:
case AMOVW:
- case AMOVF:
- case AMOVD:
+ case AMOVWD:
+ case AMOVWF:
+ case AMVN:
+ case AMULL:
+ case AMULLU:
+ if((p->scond & C_SCOND) != C_SCOND_NONE)
+ for(z=0; z<BITS; z++)
+ r->use2.b[z] |= bit.b[z];
for(z=0; z<BITS; z++)
r->set.b[z] |= bit.b[z];
break;
}
}
+ /*
+ * pass 4.5
+ * move register pseudo-variables into regu.
+ */
+ for(r = firstr; r != R; r = r->link) {
+ r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
+
+ r->set.b[0] &= ~REGBITS;
+ r->use1.b[0] &= ~REGBITS;
+ r->use2.b[0] &= ~REGBITS;
+ r->refbehind.b[0] &= ~REGBITS;
+ r->refahead.b[0] &= ~REGBITS;
+ r->calbehind.b[0] &= ~REGBITS;
+ r->calahead.b[0] &= ~REGBITS;
+ r->regdiff.b[0] &= ~REGBITS;
+ r->act.b[0] &= ~REGBITS;
+ }
+
/*
* pass 5
* isolate regions
goto onereg;
case D_REGREG:
+ bit = zbits;
if(a->offset != NREG)
- r->regu |= RtoB(a->offset);
- goto onereg;
+ bit.b[0] |= RtoB(a->offset);
+ if(a->reg != NREG)
+ bit.b[0] |= RtoB(a->reg);
+ return bit;
case D_REG:
case D_SHIFT:
- case D_OREG:
onereg:
- if(a->reg != NREG)
- r->regu |= RtoB(a->reg);
+ if(a->reg != NREG) {
+ bit = zbits;
+ bit.b[0] = RtoB(a->reg);
+ return bit;
+ }
+ break;
+
+ case D_OREG:
+ if(a->reg != NREG) {
+ if(a == &r->prog->from)
+ r->use1.b[0] |= RtoB(a->reg);
+ else
+ r->use2.b[0] |= RtoB(a->reg);
+ if(r->prog->scond & (C_PBIT|C_WBIT))
+ r->set.b[0] |= RtoB(a->reg);
+ }
break;
case D_FREG:
- if(a->reg != NREG)
- r->regu |= FtoB(a->reg);
+ if(a->reg != NREG) {
+ bit = zbits;
+ bit.b[0] = FtoB(a->reg);
+ return bit;
+ }
break;
}
#define EXTERN
#include "opt.h"
+#define NREGVAR 32 /* 16 general + 16 floating */
+#define REGBITS ((uint32)0xffffffff)
#define P2R(p) (Reg*)(p->reg)
static int first = 1;
}
}
+static char* regname[] = {
+ ".AX",
+ ".CX",
+ ".DX",
+ ".BX",
+ ".SP",
+ ".BP",
+ ".SI",
+ ".DI",
+ ".R8",
+ ".R9",
+ ".R10",
+ ".R11",
+ ".R12",
+ ".R13",
+ ".R14",
+ ".R15",
+ ".X0",
+ ".X1",
+ ".X2",
+ ".X3",
+ ".X4",
+ ".X5",
+ ".X6",
+ ".X7",
+ ".X8",
+ ".X9",
+ ".X10",
+ ".X11",
+ ".X12",
+ ".X13",
+ ".X14",
+ ".X15",
+};
+
void
regopt(Prog *firstp)
{
firstr = R;
lastr = R;
nvar = 0;
+
+ /*
+ * control flow is more complicated in generated go code
+ * than in generated c code. define pseudo-variables for
+ * registers, so we have complete register usage information.
+ */
+ nvar = NREGVAR;
+ memset(var, 0, NREGVAR*sizeof var[0]);
+ for(i=0; i<NREGVAR; i++)
+ var[i].sym = lookup(regname[i]);
+
regbits = RtoB(D_SP);
for(z=0; z<BITS; z++) {
externs.b[z] = 0;
case ACOMISD:
case AUCOMISS:
case AUCOMISD:
+ case ATESTB:
+ case ATESTL:
+ case ATESTQ:
for(z=0; z<BITS; z++)
r->use2.b[z] |= bit.b[z];
break;
/*
* right side write
*/
+ case ALEAQ:
case ANOP:
case AMOVL:
case AMOVQ:
case AMOVW:
case AMOVBLSX:
case AMOVBLZX:
+ case AMOVBWSX:
+ case AMOVBWZX:
case AMOVBQSX:
case AMOVBQZX:
case AMOVLQSX:
case AMOVWLZX:
case AMOVWQSX:
case AMOVWQZX:
+ case APOPQ:
case AMOVSS:
case AMOVSD:
case AIMULL:
case AIMULQ:
case AIMULW:
+ case ANEGB:
+ case ANEGW:
case ANEGL:
case ANEGQ:
case ANOTL:
case ASBBL:
case ASBBQ:
+ case ASETCC:
+ case ASETCS:
+ case ASETEQ:
+ case ASETGE:
+ case ASETGT:
+ case ASETHI:
+ case ASETLE:
+ case ASETLS:
+ case ASETLT:
+ case ASETMI:
+ case ASETNE:
+ case ASETOC:
+ case ASETOS:
+ case ASETPC:
+ case ASETPL:
+ case ASETPS:
+
case AXCHGB:
case AXCHGW:
case AXCHGL:
if(p->to.type != D_NONE)
break;
- case AIDIVB:
case AIDIVL:
- case AIDIVQ:
case AIDIVW:
- case AIMULB:
- case ADIVB:
+ case AIDIVQ:
case ADIVL:
- case ADIVQ:
case ADIVW:
- case AMULB:
+ case ADIVQ:
case AMULL:
- case AMULQ:
case AMULW:
+ case AMULQ:
+ r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
+ r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX);
+ break;
+
+ case AIDIVB:
+ case AIMULB:
+ case ADIVB:
+ case AMULB:
+ r->set.b[0] |= RtoB(D_AX);
+ r->use1.b[0] |= RtoB(D_AX);
+ break;
case ACWD:
- case ACDQ:
- case ACQO:
- r->regu |= RtoB(D_AX) | RtoB(D_DX);
+ r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
+ r->use1.b[0] |= RtoB(D_AX);
break;
+ case ACDQ:
+ r->set.b[0] |= RtoB(D_DX);
+ r->use1.b[0] |= RtoB(D_AX);
+ break;
+
case AREP:
case AREPN:
case ALOOP:
case ALOOPEQ:
case ALOOPNE:
- r->regu |= RtoB(D_CX);
+ r->set.b[0] |= RtoB(D_CX);
+ r->use1.b[0] |= RtoB(D_CX);
break;
case AMOVSB:
case ACMPSL:
case ACMPSQ:
case ACMPSW:
- r->regu |= RtoB(D_SI) | RtoB(D_DI);
+ r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI);
break;
case ASTOSB:
case ASCASL:
case ASCASQ:
case ASCASW:
- r->regu |= RtoB(D_AX) | RtoB(D_DI);
+ r->set.b[0] |= RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI);
break;
case AINSB:
case AINSL:
case AINSW:
+ r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_DI);
+ break;
+
case AOUTSB:
case AOUTSL:
case AOUTSW:
- r->regu |= RtoB(D_DI) | RtoB(D_DX);
+ r->set.b[0] |= RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI);
break;
}
}
if(debug['R'] && debug['v'])
dumpit("pass4", firstr);
+ /*
+ * pass 4.5
+ * move register pseudo-variables into regu.
+ */
+ for(r = firstr; r != R; r = r->link) {
+ r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
+
+ r->set.b[0] &= ~REGBITS;
+ r->use1.b[0] &= ~REGBITS;
+ r->use2.b[0] &= ~REGBITS;
+ r->refbehind.b[0] &= ~REGBITS;
+ r->refahead.b[0] &= ~REGBITS;
+ r->calbehind.b[0] &= ~REGBITS;
+ r->calahead.b[0] &= ~REGBITS;
+ r->regdiff.b[0] &= ~REGBITS;
+ r->act.b[0] &= ~REGBITS;
+ }
+
/*
* pass 5
* isolate regions
{
Var *v;
int i, t, n, et, z, w, flag;
+ uint32 regu;
int32 o;
Bits bit;
Sym *s;
if(t == D_NONE)
goto none;
- if(r != R) {
- r->regu |= doregbits(t);
- r->regu |= doregbits(a->index);
- }
+ if(r != R)
+ r->use1.b[0] |= doregbits(a->index);
switch(t) {
default:
- goto none;
+ regu = doregbits(t);
+ if(regu == 0)
+ goto none;
+ bit = zbits;
+ bit.b[0] = regu;
+ return bit;
case D_ADDR:
a->type = a->index;
#define EXTERN
#include "opt.h"
+#define NREGVAR 8
+#define REGBITS ((uint32)0xff)
#define P2R(p) (Reg*)(p->reg)
static int first = 1;
}
}
+static char* regname[] = { ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di" };
+
void
regopt(Prog *firstp)
{
r1 = R;
firstr = R;
lastr = R;
- nvar = 0;
+
+ /*
+ * control flow is more complicated in generated go code
+ * than in generated c code. define pseudo-variables for
+ * registers, so we have complete register usage information.
+ */
+ nvar = NREGVAR;
+ memset(var, 0, NREGVAR*sizeof var[0]);
+ for(i=0; i<NREGVAR; i++)
+ var[i].sym = lookup(regname[i]);
+
regbits = RtoB(D_SP);
for(z=0; z<BITS; z++) {
externs.b[z] = 0;
/*
* right side write
*/
+ case AFSTSW:
+ case ALEAL:
case ANOP:
case AMOVL:
case AMOVB:
case AMOVW:
case AMOVBLSX:
case AMOVBLZX:
+ case AMOVBWSX:
+ case AMOVBWZX:
case AMOVWLSX:
case AMOVWLZX:
+ case APOPL:
for(z=0; z<BITS; z++)
r->set.b[z] |= bit.b[z];
break;
case AADCL:
case ASBBL:
+ case ASETCC:
+ case ASETCS:
+ case ASETEQ:
+ case ASETGE:
+ case ASETGT:
+ case ASETHI:
+ case ASETLE:
+ case ASETLS:
+ case ASETLT:
+ case ASETMI:
+ case ASETNE:
+ case ASETOC:
+ case ASETOS:
+ case ASETPC:
+ case ASETPL:
+ case ASETPS:
+
case AXCHGB:
case AXCHGW:
case AXCHGL:
if(p->to.type != D_NONE)
break;
- case AIDIVB:
case AIDIVL:
case AIDIVW:
- case AIMULB:
- case ADIVB:
case ADIVL:
case ADIVW:
- case AMULB:
case AMULL:
case AMULW:
+ r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
+ r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX);
+ break;
+
+ case AIDIVB:
+ case AIMULB:
+ case ADIVB:
+ case AMULB:
+ r->set.b[0] |= RtoB(D_AX);
+ r->use1.b[0] |= RtoB(D_AX);
+ break;
case ACWD:
+ r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
+ r->use1.b[0] |= RtoB(D_AX);
+ break;
+
case ACDQ:
- r->regu |= RtoB(D_AX) | RtoB(D_DX);
+ r->set.b[0] |= RtoB(D_DX);
+ r->use1.b[0] |= RtoB(D_AX);
break;
case AREP:
case ALOOP:
case ALOOPEQ:
case ALOOPNE:
- r->regu |= RtoB(D_CX);
+ r->set.b[0] |= RtoB(D_CX);
+ r->use1.b[0] |= RtoB(D_CX);
break;
case AMOVSB:
case ACMPSB:
case ACMPSL:
case ACMPSW:
- r->regu |= RtoB(D_SI) | RtoB(D_DI);
+ r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI);
break;
case ASTOSB:
case ASCASB:
case ASCASL:
case ASCASW:
- r->regu |= RtoB(D_AX) | RtoB(D_DI);
+ r->set.b[0] |= RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI);
break;
case AINSB:
case AINSL:
case AINSW:
+ r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_DI);
+ break;
+
case AOUTSB:
case AOUTSL:
case AOUTSW:
- r->regu |= RtoB(D_DI) | RtoB(D_DX);
+ r->set.b[0] |= RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI);
break;
}
}
if(debug['R'] && debug['v'])
dumpit("pass4", firstr);
+ /*
+ * pass 4.5
+ * move register pseudo-variables into regu.
+ */
+ for(r = firstr; r != R; r = r->link) {
+ r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
+
+ r->set.b[0] &= ~REGBITS;
+ r->use1.b[0] &= ~REGBITS;
+ r->use2.b[0] &= ~REGBITS;
+ r->refbehind.b[0] &= ~REGBITS;
+ r->refahead.b[0] &= ~REGBITS;
+ r->calbehind.b[0] &= ~REGBITS;
+ r->calahead.b[0] &= ~REGBITS;
+ r->regdiff.b[0] &= ~REGBITS;
+ r->act.b[0] &= ~REGBITS;
+ }
+
/*
* pass 5
* isolate regions
mkvar(Reg *r, Adr *a)
{
Var *v;
- int i, t, n, et, z, w, flag;
+ int i, t, n, et, z, w, flag, regu;
int32 o;
Bits bit;
Sym *s;
if(t == D_NONE)
goto none;
- if(r != R) {
- r->regu |= doregbits(t);
- r->regu |= doregbits(a->index);
- }
+ if(r != R)
+ r->use1.b[0] |= doregbits(a->index);
switch(t) {
default:
- goto none;
+ regu = doregbits(t);
+ if(regu == 0)
+ goto none;
+ bit = zbits;
+ bit.b[0] = regu;
+ return bit;
case D_ADDR:
a->type = a->index;
if(thechar == '5')
stksize = rnd(stksize, widthptr);
n->xoffset = -stksize;
- n->pun = anyregalloc();
}