From: Russ Cox Date: Tue, 4 Oct 2011 19:06:16 +0000 (-0400) Subject: 5g, 6g, 8g: fix loop finding bug, squash jmps X-Git-Tag: weekly.2011-10-06~36 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=e2d326b87893c73c9f4a91c95bb9e011046e366c;p=gostls13.git 5g, 6g, 8g: fix loop finding bug, squash jmps The loop recognizer uses the standard dominance frontiers but gets confused by dead code, which has a (not explicitly set) rpo number of 0, meaning it looks like the head of the function, so it dominates everything. If the loop recognizer encounters dead code while tracking backward through the graph it fails to recognize where it started as a loop, and then the optimizer does not registerize values loaded inside that loop. Fix by checking rpo against rpo2r. Separately, run a quick pass over the generated code to squash JMPs to JMP instructions, which are convenient to emit during code generation but difficult to read when debugging the -S output. A side effect of this pass is to eliminate dead code, so the output files may be slightly smaller and the optimizer may have less work to do. There is no semantic effect, because the linkers flatten JMP chains and delete dead instructions when laying out the final code. Doing it here too just makes the -S output easier to read and more like what the final binary will contain. The "dead code breaks loop finding" bug is thus fixed twice over. It seemed prudent to fix loopit separately just in case dead code ever sneaks back in for one reason or another. R=ken2 CC=golang-dev https://golang.org/cl/5190043 --- diff --git a/src/cmd/5g/reg.c b/src/cmd/5g/reg.c index a2e99492d6..b72b9c1657 100644 --- a/src/cmd/5g/reg.c +++ b/src/cmd/5g/reg.c @@ -42,6 +42,9 @@ int noreturn(Prog *p); static int first = 0; +static void fixjmp(Prog*); + + Reg* rega(void) { @@ -171,6 +174,8 @@ regopt(Prog *firstp) if(first == 0) { fmtinstall('Q', Qconv); } + + fixjmp(firstp); first++; if(debug['K']) { @@ -1159,10 +1164,12 @@ loopit(Reg *r, int32 nr) r1 = rpo2r[i]; me = r1->rpo; d = -1; - if(r1->p1 != R && r1->p1->rpo < me) + // rpo2r[r->rpo] == r protects against considering dead code, + // which has r->rpo == 0. + if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me) d = r1->p1->rpo; for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) - if(r1->rpo < me) + if(rpo2r[r1->rpo] == r1 && r1->rpo < me) d = rpolca(idom, d, r1->rpo); idom[i] = d; } @@ -1608,3 +1615,123 @@ dumpit(char *str, Reg *r0) // } } } + +/* + * the code generator depends on being able to write out JMP (B) + * instructions that it can jump to now but fill in later. + * the linker will resolve them nicely, but they make the code + * longer and more difficult to follow during debugging. + * remove them. + */ + +/* what instruction does a JMP to p eventually land on? */ +static Prog* +chasejmp(Prog *p, int *jmploop) +{ + int n; + + n = 0; + while(p != P && p->as == AB && p->to.type == D_BRANCH) { + if(++n > 10) { + *jmploop = 1; + break; + } + p = p->to.branch; + } + return p; +} + +/* + * reuse reg pointer for mark/sweep state. + * leave reg==nil at end because alive==nil. + */ +#define alive ((void*)0) +#define dead ((void*)1) + +/* mark all code reachable from firstp as alive */ +static void +mark(Prog *firstp) +{ + Prog *p; + + for(p=firstp; p; p=p->link) { + if(p->regp != dead) + break; + p->regp = alive; + if(p->as != ABL && p->to.type == D_BRANCH && p->to.branch) + mark(p->to.branch); + if(p->as == AB || p->as == ARET || (p->as == ABL && noreturn(p))) + break; + } +} + +static void +fixjmp(Prog *firstp) +{ + int jmploop; + Prog *p, *last; + + if(debug['R'] && debug['v']) + print("\nfixjmp\n"); + + // pass 1: resolve jump to B, mark all code as dead. + jmploop = 0; + for(p=firstp; p; p=p->link) { + if(debug['R'] && debug['v']) + print("%P\n", p); + if(p->as != ABL && p->to.type == D_BRANCH && p->to.branch && p->to.branch->as == AB) { + p->to.branch = chasejmp(p->to.branch, &jmploop); + if(debug['R'] && debug['v']) + print("->%P\n", p); + } + p->regp = dead; + } + if(debug['R'] && debug['v']) + print("\n"); + + // pass 2: mark all reachable code alive + mark(firstp); + + // pass 3: delete dead code (mostly JMPs). + last = nil; + for(p=firstp; p; p=p->link) { + if(p->regp == dead) { + if(p->link == P && p->as == ARET && last && last->as != ARET) { + // This is the final ARET, and the code so far doesn't have one. + // Let it stay. + } else { + if(debug['R'] && debug['v']) + print("del %P\n", p); + continue; + } + } + if(last) + last->link = p; + last = p; + } + last->link = P; + + // pass 4: elide JMP to next instruction. + // only safe if there are no jumps to JMPs anymore. + if(!jmploop) { + last = nil; + for(p=firstp; p; p=p->link) { + if(p->as == AB && p->to.type == D_BRANCH && p->to.branch == p->link) { + if(debug['R'] && debug['v']) + print("del %P\n", p); + continue; + } + if(last) + last->link = p; + last = p; + } + last->link = P; + } + + if(debug['R'] && debug['v']) { + print("\n"); + for(p=firstp; p; p=p->link) + print("%P\n", p); + print("\n"); + } +} diff --git a/src/cmd/6g/reg.c b/src/cmd/6g/reg.c index d12d4b19b7..82a2ce3125 100644 --- a/src/cmd/6g/reg.c +++ b/src/cmd/6g/reg.c @@ -151,6 +151,8 @@ static char* regname[] = { ".X15", }; +static void fixjmp(Prog*); + void regopt(Prog *firstp) { @@ -166,6 +168,8 @@ regopt(Prog *firstp) first = 0; } + fixjmp(firstp); + // count instructions nr = 0; for(p=firstp; p!=P; p=p->link) @@ -800,9 +804,9 @@ brk: if(ostats.ndelmov) print(" %4d delmov\n", ostats.ndelmov); if(ostats.nvar) - print(" %4d delmov\n", ostats.nvar); + print(" %4d var\n", ostats.nvar); if(ostats.naddr) - print(" %4d delmov\n", ostats.naddr); + print(" %4d addr\n", ostats.naddr); memset(&ostats, 0, sizeof(ostats)); } @@ -1217,10 +1221,12 @@ loopit(Reg *r, int32 nr) r1 = rpo2r[i]; me = r1->rpo; d = -1; - if(r1->p1 != R && r1->p1->rpo < me) + // rpo2r[r->rpo] == r protects against considering dead code, + // which has r->rpo == 0. + if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me) d = r1->p1->rpo; for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) - if(r1->rpo < me) + if(rpo2r[r1->rpo] == r1 && r1->rpo < me) d = rpolca(idom, d, r1->rpo); idom[i] = d; } @@ -1685,3 +1691,123 @@ noreturn(Prog *p) return 1; return 0; } + +/* + * the code generator depends on being able to write out JMP + * instructions that it can jump to now but fill in later. + * the linker will resolve them nicely, but they make the code + * longer and more difficult to follow during debugging. + * remove them. + */ + +/* what instruction does a JMP to p eventually land on? */ +static Prog* +chasejmp(Prog *p, int *jmploop) +{ + int n; + + n = 0; + while(p != P && p->as == AJMP && p->to.type == D_BRANCH) { + if(++n > 10) { + *jmploop = 1; + break; + } + p = p->to.branch; + } + return p; +} + +/* + * reuse reg pointer for mark/sweep state. + * leave reg==nil at end because alive==nil. + */ +#define alive ((void*)0) +#define dead ((void*)1) + +/* mark all code reachable from firstp as alive */ +static void +mark(Prog *firstp) +{ + Prog *p; + + for(p=firstp; p; p=p->link) { + if(p->reg != dead) + break; + p->reg = alive; + if(p->as != ACALL && p->to.type == D_BRANCH && p->to.branch) + mark(p->to.branch); + if(p->as == AJMP || p->as == ARET || (p->as == ACALL && noreturn(p))) + break; + } +} + +static void +fixjmp(Prog *firstp) +{ + int jmploop; + Prog *p, *last; + + if(debug['R'] && debug['v']) + print("\nfixjmp\n"); + + // pass 1: resolve jump to AJMP, mark all code as dead. + jmploop = 0; + for(p=firstp; p; p=p->link) { + if(debug['R'] && debug['v']) + print("%P\n", p); + if(p->as != ACALL && p->to.type == D_BRANCH && p->to.branch && p->to.branch->as == AJMP) { + p->to.branch = chasejmp(p->to.branch, &jmploop); + if(debug['R'] && debug['v']) + print("->%P\n", p); + } + p->reg = dead; + } + if(debug['R'] && debug['v']) + print("\n"); + + // pass 2: mark all reachable code alive + mark(firstp); + + // pass 3: delete dead code (mostly JMPs). + last = nil; + for(p=firstp; p; p=p->link) { + if(p->reg == dead) { + if(p->link == P && p->as == ARET && last && last->as != ARET) { + // This is the final ARET, and the code so far doesn't have one. + // Let it stay. + } else { + if(debug['R'] && debug['v']) + print("del %P\n", p); + continue; + } + } + if(last) + last->link = p; + last = p; + } + last->link = P; + + // pass 4: elide JMP to next instruction. + // only safe if there are no jumps to JMPs anymore. + if(!jmploop) { + last = nil; + for(p=firstp; p; p=p->link) { + if(p->as == AJMP && p->to.type == D_BRANCH && p->to.branch == p->link) { + if(debug['R'] && debug['v']) + print("del %P\n", p); + continue; + } + if(last) + last->link = p; + last = p; + } + last->link = P; + } + + if(debug['R'] && debug['v']) { + print("\n"); + for(p=firstp; p; p=p->link) + print("%P\n", p); + print("\n"); + } +} diff --git a/src/cmd/8g/reg.c b/src/cmd/8g/reg.c index 29ea68b64f..2276282264 100644 --- a/src/cmd/8g/reg.c +++ b/src/cmd/8g/reg.c @@ -39,6 +39,8 @@ static int first = 1; +static void fixjmp(Prog*); + Reg* rega(void) { @@ -132,6 +134,8 @@ regopt(Prog *firstp) exregoffset = D_DI; // no externals first = 0; } + + fixjmp(firstp); // count instructions nr = 0; @@ -694,9 +698,9 @@ brk: if(ostats.ndelmov) print(" %4d delmov\n", ostats.ndelmov); if(ostats.nvar) - print(" %4d delmov\n", ostats.nvar); + print(" %4d var\n", ostats.nvar); if(ostats.naddr) - print(" %4d delmov\n", ostats.naddr); + print(" %4d addr\n", ostats.naddr); memset(&ostats, 0, sizeof(ostats)); } @@ -1097,10 +1101,12 @@ loopit(Reg *r, int32 nr) r1 = rpo2r[i]; me = r1->rpo; d = -1; - if(r1->p1 != R && r1->p1->rpo < me) + // rpo2r[r->rpo] == r protects against considering dead code, + // which has r->rpo == 0. + if(r1->p1 != R && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me) d = r1->p1->rpo; for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) - if(r1->rpo < me) + if(rpo2r[r1->rpo] == r1 && r1->rpo < me) d = rpolca(idom, d, r1->rpo); idom[i] = d; } @@ -1544,3 +1550,123 @@ noreturn(Prog *p) return 1; return 0; } + +/* + * the code generator depends on being able to write out JMP + * instructions that it can jump to now but fill in later. + * the linker will resolve them nicely, but they make the code + * longer and more difficult to follow during debugging. + * remove them. + */ + +/* what instruction does a JMP to p eventually land on? */ +static Prog* +chasejmp(Prog *p, int *jmploop) +{ + int n; + + n = 0; + while(p != P && p->as == AJMP && p->to.type == D_BRANCH) { + if(++n > 10) { + *jmploop = 1; + break; + } + p = p->to.branch; + } + return p; +} + +/* + * reuse reg pointer for mark/sweep state. + * leave reg==nil at end because alive==nil. + */ +#define alive ((void*)0) +#define dead ((void*)1) + +/* mark all code reachable from firstp as alive */ +static void +mark(Prog *firstp) +{ + Prog *p; + + for(p=firstp; p; p=p->link) { + if(p->reg != dead) + break; + p->reg = alive; + if(p->as != ACALL && p->to.type == D_BRANCH && p->to.branch) + mark(p->to.branch); + if(p->as == AJMP || p->as == ARET || (p->as == ACALL && noreturn(p))) + break; + } +} + +static void +fixjmp(Prog *firstp) +{ + int jmploop; + Prog *p, *last; + + if(debug['R'] && debug['v']) + print("\nfixjmp\n"); + + // pass 1: resolve jump to AJMP, mark all code as dead. + jmploop = 0; + for(p=firstp; p; p=p->link) { + if(debug['R'] && debug['v']) + print("%P\n", p); + if(p->as != ACALL && p->to.type == D_BRANCH && p->to.branch && p->to.branch->as == AJMP) { + p->to.branch = chasejmp(p->to.branch, &jmploop); + if(debug['R'] && debug['v']) + print("->%P\n", p); + } + p->reg = dead; + } + if(debug['R'] && debug['v']) + print("\n"); + + // pass 2: mark all reachable code alive + mark(firstp); + + // pass 3: delete dead code (mostly JMPs). + last = nil; + for(p=firstp; p; p=p->link) { + if(p->reg == dead) { + if(p->link == P && p->as == ARET && last && last->as != ARET) { + // This is the final ARET, and the code so far doesn't have one. + // Let it stay. + } else { + if(debug['R'] && debug['v']) + print("del %P\n", p); + continue; + } + } + if(last) + last->link = p; + last = p; + } + last->link = P; + + // pass 4: elide JMP to next instruction. + // only safe if there are no jumps to JMPs anymore. + if(!jmploop) { + last = nil; + for(p=firstp; p; p=p->link) { + if(p->as == AJMP && p->to.type == D_BRANCH && p->to.branch == p->link) { + if(debug['R'] && debug['v']) + print("del %P\n", p); + continue; + } + if(last) + last->link = p; + last = p; + } + last->link = P; + } + + if(debug['R'] && debug['v']) { + print("\n"); + for(p=firstp; p; p=p->link) + print("%P\n", p); + print("\n"); + } +}