]> Cypherpunks repositories - gostls13.git/commitdiff
8l: function at a time code layout
authorRuss Cox <rsc@golang.org>
Sat, 16 Oct 2010 00:19:57 +0000 (20:19 -0400)
committerRuss Cox <rsc@golang.org>
Sat, 16 Oct 2010 00:19:57 +0000 (20:19 -0400)
R=ken2
CC=golang-dev
https://golang.org/cl/2481042

src/cmd/6l/span.c
src/cmd/8l/asm.c
src/cmd/8l/l.h
src/cmd/8l/span.c

index 7ea0b636948e24e556a32d0a6a32a70b1cad486c..08b604ba49a571d8ca9e45fc52892df759c17d28 100644 (file)
@@ -153,9 +153,7 @@ span(void)
        // NOTE(rsc): If we get rid of the globals we should
        // be able to parallelize these iterations.
        for(cursym = textp; cursym != nil; cursym = cursym->next) {
-               if(!cursym->reachable)
-                       continue;
-               
+               // TODO: move into span1
                for(p = cursym->text; p != P; p = p->link) {
                        n = 0;
                        if(p->to.type == D_BRANCH)
@@ -189,8 +187,6 @@ span(void)
        sect = segtext.sect;
        sect->vaddr = c;
        for(cursym = textp; cursym != nil; cursym = cursym->next) {
-               if(!cursym->reachable)
-                       continue;
                cursym->value = c;
                for(p = cursym->text; p != P; p = p->link)
                        p->pc += c;
index 99d5b61563ec1c95defc1f58b80fb5dbf603752c..74b4e9b7632b3383d82fff512d317b4336808674 100644 (file)
@@ -332,11 +332,9 @@ phsh(Elf64_Phdr *ph, Elf64_Shdr *sh)
 void
 asmb(void)
 {
-       Prog *p;
        int32 v, magic;
        int a, dynsym;
        uint32 va, fo, w, symo, startva, machlink;
-       uchar *op1;
        ulong expectpc;
        ElfEhdr *eh;
        ElfPhdr *ph, *pph;
@@ -349,54 +347,9 @@ asmb(void)
 
        seek(cout, HEADR, 0);
        pc = INITTEXT;
-       
-       for(cursym = textp; cursym != nil; cursym = cursym->next) {
-               for(p = cursym->text; p != P; p = p->link) {
-                       curp = p;
-                       if(HEADTYPE == 8) {
-                               // native client
-                               expectpc = p->pc;
-                               p->pc = pc;
-                               asmins(p);
-                               if(p->pc != expectpc) {
-                                       Bflush(&bso);
-                                       diag("phase error %ux sb %ux in %s", p->pc, expectpc, TNAME);
-                               }
-                               while(pc < p->pc) {
-                                       cput(0x90);     // nop
-                                       pc++;
-                               }
-                       }
-                       if(p->pc != pc) {
-                               Bflush(&bso);
-                               if(!debug['a'])
-                                       print("%P\n", curp);
-                               diag("phase error %ux sb %ux in %s", p->pc, pc, TNAME);
-                               pc = p->pc;
-                       }
-                       if(HEADTYPE != 8) {
-                               asmins(p);
-                               if(pc != p->pc) {
-                                       Bflush(&bso);
-                                       diag("asmins changed pc %ux sb %ux in %s", p->pc, pc, TNAME);
-                               }
-                       }
-                       if(cbc < sizeof(and))
-                               cflush();
-                       a = (andptr - and);
-       
-                       if(debug['a']) {
-                               Bprint(&bso, pcstr, pc);
-                               for(op1 = and; op1 < andptr; op1++)
-                                       Bprint(&bso, "%.2ux", *op1 & 0xff);
-                               Bprint(&bso, "\t%P\n", curp);
-                       }
-                       memmove(cbp, and, a);
-                       cbp += a;
-                       pc += a;
-                       cbc -= a;
-               }
-       }
+       codeblk(pc, segtext.sect->len);
+       pc += segtext.sect->len;
+
        if(HEADTYPE == 8) {
                int32 etext;
                
@@ -406,8 +359,8 @@ asmb(void)
                        pc++;
                }
                pc = segrodata.vaddr;
+               cflush();
        }
-       cflush();
 
        /* output read-only data in text segment */
        sect = segtext.sect->next;
index 255ba6e39e116f2653255dd8e98aceca75de9606..53dc63c583838eee03f64c6f3cdbf4f02b83b6f2 100644 (file)
@@ -94,8 +94,8 @@ struct        Prog
        Adr     from;
        Adr     to;
        Prog*   forwd;
+       Prog*   comefrom;
        Prog*   link;
-       Prog*   dlink;
        Prog*   pcond;  /* work on this */
        int32   pc;
        int32   spadj;
index 076a6116d754e05a4f36f3c5f4726e2a497626ab..ffde369025cf71affd0528791cb2ff79d9bff3a2 100644 (file)
 #include       "l.h"
 #include       "../ld/lib.h"
 
+static int32   vaddr(Adr*, Reloc*);
+
+void
+span1(Sym *s)
+{
+       Prog *p, *q;
+       int32 c, v, loop;
+       uchar *bp;
+       int n, m, i;
+
+       cursym = s;
+
+       for(p = s->text; p != P; p = p->link) {
+               p->back = 2;    // use short branches first time through
+               if((q = p->pcond) != P && (q->back & 2))
+                       p->back |= 1;   // backward jump
+
+               if(p->as == AADJSP) {
+                       p->to.type = D_SP;
+                       v = -p->from.offset;
+                       p->from.offset = v;
+                       p->as = AADDL;
+                       if(v < 0) {
+                               p->as = ASUBL;
+                               v = -v;
+                               p->from.offset = v;
+                       }
+                       if(v == 0)
+                               p->as = ANOP;
+               }
+       }
+       
+       n = 0;
+       do {
+               loop = 0;
+               memset(s->r, 0, s->nr*sizeof s->r[0]);
+               s->nr = 0;
+               s->np = 0;
+               c = 0;
+               for(p = s->text; p != P; p = p->link) {
+                       p->pc = c;
+
+                       // process forward jumps to p
+                       for(q = p->comefrom; q != P; q = q->forwd) {
+                               v = p->pc - (q->pc + q->mark);
+                               if(q->back & 2) {       // short
+                                       if(v > 127) {
+                                               loop++;
+                                               q->back ^= 2;
+                                       }
+                                       s->p[q->pc+1] = v;
+                               } else {
+                                       bp = s->p + q->pc + q->mark - 4;
+                                       *bp++ = v;
+                                       *bp++ = v>>8;
+                                       *bp++ = v>>16;
+                                       *bp++ = v>>24;
+                               }       
+                       }
+                       p->comefrom = P;
+
+                       asmins(p);
+                       p->pc = c;
+                       m = andptr-and;
+                       symgrow(s, p->pc+m);
+                       memmove(s->p+p->pc, and, m);
+                       p->mark = m;
+                       c += m;
+               }
+               if(++n > 20) {
+                       diag("span must be looping");
+                       errorexit();
+               }
+       } while(loop);
+       s->size = c;
+
+       if(debug['a'] > 1) {
+               print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
+               for(i=0; i<s->np; i++) {
+                       print(" %.2ux", s->p[i]);
+                       if(i%16 == 15)
+                               print("\n  %.6ux", i+1);
+               }
+               if(i%16)
+                       print("\n");
+       
+               for(i=0; i<s->nr; i++) {
+                       Reloc *r;
+                       
+                       r = &s->r[i];
+                       print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
+               }
+       }
+}
+
 void
 span(void)
 {
        Prog *p, *q;
-       int32 v, c, idat, etext;
-       int m, n, again;
+       int32 v, c;
+       int n;
        Sym *s;
        Section *sect, *rosect;
 
+       if(debug['v'])
+               Bprint(&bso, "%5.2f span\n", cputime());
+
+       segtext.rwx = 05;
+       segtext.vaddr = INITTEXT - HEADR;
+       
        xdefine("etext", STEXT, 0L);
        xdefine("rodata", SRODATA, 0L);
        xdefine("erodata", SRODATA, 0L);
 
-       idat = INITDAT;
+       // NOTE(rsc): If we get rid of the globals we should
+       // be able to parallelize these iterations.
        for(cursym = textp; cursym != nil; cursym = cursym->next) {
+               // TODO: move into span1
                for(p = cursym->text; p != P; p = p->link) {
                        n = 0;
                        if(p->to.type == D_BRANCH)
@@ -71,88 +174,45 @@ span(void)
                                        p->as = ANOP;
                        }
                }
+               span1(cursym);
        }
-       n = 0;
-       
-       rosect = segtext.sect->next;
-
-start:
-       do{
-               again = 0;
-               if(debug['v'])
-                       Bprint(&bso, "%5.2f span %d\n", cputime(), n);
-               Bflush(&bso);
-               if(n > 50) {
-                       print("span must be looping - %d\n", textsize);
-                       errorexit();
-               }
-               c = INITTEXT;
-               for(cursym = textp; cursym != nil; cursym = cursym->next) {
-                       if(HEADTYPE == 8)
-                               c = (c+31)&~31;
-                       for(p = cursym->text; p != P; p = p->link) {
-                               if(p->to.type == D_BRANCH)
-                                       if(p->back)
-                                               p->pc = c;
-                               if(n == 0 || HEADTYPE == 8 || p->to.type == D_BRANCH) {
-                                       if(HEADTYPE == 8)
-                                               p->pc = c;
-                                       asmins(p);
-                                       m = andptr-and;
-                                       if(p->mark != m)
-                                               again = 1;
-                                       p->mark = m;
-                               }
-                               if(HEADTYPE == 8) {
-                                       c = p->pc + p->mark;
-                               } else {
-                                       p->pc = c;
-                                       c += p->mark;
-                               }
-                       }
-               }
-               textsize = c;
-               n++;
-       }while(again);
-       etext = c;
-       c += textpad;
        
-       if(rosect) {
-               if(INITRND)
-                       c = rnd(c, INITRND);
-               if(rosect->vaddr != c){
-                       rosect->vaddr = c;
-                       goto start;
-               }
-               c += rosect->len;
-       }
-
-       if(INITRND) {
-               INITDAT = rnd(c, INITRND);
-               if(INITDAT != idat) {
-                       idat = INITDAT;
-                       goto start;
-               }
+       // Next, loop over symbols to assign actual PCs.
+       // Could parallelize here too, by assigning to text 
+       // and then letting threads copy down, but probably not worth it.
+       c = INITTEXT;
+       sect = segtext.sect;
+       sect->vaddr = c;
+       for(cursym = textp; cursym != nil; cursym = cursym->next) {
+               cursym->value = c;
+               for(p = cursym->text; p != P; p = p->link)
+                       p->pc += c;
+               c += cursym->size;
        }
+       sect->len = c - sect->vaddr;
+       xdefine("etext", STEXT, c);
+       if(debug['v'])
+               Bprint(&bso, "etext = %llux\n", c);
 
-       xdefine("etext", STEXT, etext);
-
+       xdefine("rodata", SRODATA, c);
+       if(INITRND)
+               c = rnd(c, INITRND);
+       rosect = segtext.sect->next;
+       rosect->vaddr = c;
+       c += rosect->len;
+       xdefine("erodata", SRODATA, c);
+       textsize = c - INITTEXT;
        if(debug['v'])
-               Bprint(&bso, "etext = %ux\n", c);
+               Bprint(&bso, "erodata = %llux", c);
        Bflush(&bso);
-       for(cursym = textp; cursym != nil; cursym = cursym->next)
-               cursym->value = cursym->text->pc;
-       textsize = c - INITTEXT;
 
-       segtext.rwx = 05;
-       segtext.vaddr = INITTEXT - HEADR;
-       segtext.len = INITDAT - INITTEXT + HEADR;
-       segtext.filelen = textsize + HEADR;
-       
-       sect = segtext.sect;
-       sect->vaddr = INITTEXT;
-       sect->len = etext - sect->vaddr;
+       segtext.len = c - segtext.vaddr;
+       segtext.filelen = segtext.len;
 
+       if(INITRND)
+               c = rnd(c, INITRND);
+       INITDAT = c;
+       
        // Adjust everything now that we know INITDAT.
        // This will get simpler when everything is relocatable
        // and we can run span before dodata.
@@ -177,8 +237,6 @@ start:
                        break;
                }
        }
-       
-       // TODO(rsc): if HEADTYPE == NACL fix up segrodata.
 }
 
 void
@@ -478,26 +536,49 @@ put4(int32 v)
        andptr += 4;
 }
 
-static int32 vaddr(Adr*);
+static void
+relput4(Prog *p, Adr *a)
+{
+       vlong v;
+       Reloc rel, *r;
+       
+       v = vaddr(a, &rel);
+       if(rel.siz != 0) {
+               if(rel.siz != 4)
+                       diag("bad reloc");
+               r = addrel(cursym);
+               *r = rel;
+               r->off = p->pc + andptr - and;
+       }
+       put4(v);
+}
 
 int32
 symaddr(Sym *s)
 {
-       Adr a;
-
-       a.type = D_ADDR;
-       a.index = D_EXTERN;
-       a.offset = 0;
-       a.sym = s;
-       return vaddr(&a);
+       switch(s->type) {
+       case SFIXED:
+               return s->value;
+       
+       case SMACHO:
+               return INITDAT + segdata.filelen - dynptrsize + s->value;
+       
+       default:
+               if(!s->reachable)
+                       diag("unreachable symbol in symaddr - %s", s->name);
+               return s->value;
+       }
 }
 
 static int32
-vaddr(Adr *a)
+vaddr(Adr *a, Reloc *r)
 {
        int t;
        int32 v;
        Sym *s;
+       
+       if(r != nil)
+               memset(r, 0, sizeof *r);
 
        t = a->type;
        v = a->offset;
@@ -512,13 +593,19 @@ vaddr(Adr *a)
                        case SFIXED:
                                v += s->value;
                                break;
-                       case SMACHO:
-                               v += INITDAT + segdata.filelen - dynptrsize + s->value;
-                               break;
                        default:
                                if(!s->reachable)
                                        sysfatal("unreachable symbol in vaddr - %s", s->name);
-                               v += s->value;
+                               if(r == nil) {
+                                       diag("need reloc for %D", a);
+                                       errorexit();
+                               }
+                               r->type = D_ADDR;
+                               r->siz = 4;
+                               r->off = -1;
+                               r->sym = s;
+                               r->add = v;
+                               v = 0;
                                break;
                        }
                }
@@ -531,9 +618,11 @@ asmand(Adr *a, int r)
 {
        int32 v;
        int t, scale;
+       Reloc rel;
 
        v = a->offset;
        t = a->type;
+       rel.siz = 0;
        if(a->index != D_NONE) {
                if(t < D_INDIR || t >= 2*D_INDIR) {
                        switch(t) {
@@ -542,7 +631,7 @@ asmand(Adr *a, int r)
                        case D_STATIC:
                        case D_EXTERN:
                                t = D_NONE;
-                               v = vaddr(a);
+                               v = vaddr(a, &rel);
                                break;
                        case D_AUTO:
                        case D_PARAM:
@@ -555,15 +644,14 @@ asmand(Adr *a, int r)
                if(t == D_NONE) {
                        *andptr++ = (0 << 6) | (4 << 0) | (r << 3);
                        asmidx(a->scale, a->index, t);
-                       put4(v);
-                       return;
+                       goto putrelv;
                }
-               if(v == 0 && t != D_BP) {
+               if(v == 0 && rel.siz == 0 && t != D_BP) {
                        *andptr++ = (0 << 6) | (4 << 0) | (r << 3);
                        asmidx(a->scale, a->index, t);
                        return;
                }
-               if(v >= -128 && v < 128) {
+               if(v >= -128 && v < 128 && rel.siz == 0) {
                        *andptr++ = (1 << 6) | (4 << 0) | (r << 3);
                        asmidx(a->scale, a->index, t);
                        *andptr++ = v;
@@ -571,8 +659,7 @@ asmand(Adr *a, int r)
                }
                *andptr++ = (2 << 6) | (4 << 0) | (r << 3);
                asmidx(a->scale, a->index, t);
-               put4(v);
-               return;
+               goto putrelv;
        }
        if(t >= D_AL && t <= D_F0+7) {
                if(v)
@@ -589,7 +676,7 @@ asmand(Adr *a, int r)
                case D_STATIC:
                case D_EXTERN:
                        t = D_NONE;
-                       v = vaddr(a);
+                       v = vaddr(a, &rel);
                        break;
                case D_AUTO:
                case D_PARAM:
@@ -602,16 +689,15 @@ asmand(Adr *a, int r)
 
        if(t == D_NONE || (D_CS <= t && t <= D_GS)) {
                *andptr++ = (0 << 6) | (5 << 0) | (r << 3);
-               put4(v);
-               return;
+               goto putrelv;
        }
        if(t == D_SP) {
-               if(v == 0) {
+               if(v == 0 && rel.siz == 0) {
                        *andptr++ = (0 << 6) | (4 << 0) | (r << 3);
                        asmidx(scale, D_NONE, t);
                        return;
                }
-               if(v >= -128 && v < 128) {
+               if(v >= -128 && v < 128 && rel.siz == 0) {
                        *andptr++ = (1 << 6) | (4 << 0) | (r << 3);
                        asmidx(scale, D_NONE, t);
                        *andptr++ = v;
@@ -619,24 +705,38 @@ asmand(Adr *a, int r)
                }
                *andptr++ = (2 << 6) | (4 << 0) | (r << 3);
                asmidx(scale, D_NONE, t);
-               put4(v);
-               return;
+               goto putrelv;
        }
        if(t >= D_AX && t <= D_DI) {
-               if(v == 0 && t != D_BP) {
+               if(v == 0 && rel.siz == 0 && t != D_BP) {
                        *andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
                        return;
                }
-               if(v >= -128 && v < 128) {
+               if(v >= -128 && v < 128 && rel.siz == 0) {
                        andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
                        andptr[1] = v;
                        andptr += 2;
                        return;
                }
                *andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
-               put4(v);
-               return;
+               goto putrelv;
        }
+       goto bad;
+
+putrelv:
+       if(rel.siz != 0) {
+               Reloc *r;
+               
+               if(rel.siz != 4) {
+                       diag("bad rel");
+                       goto bad;
+               }
+               r = addrel(cursym);
+               *r = rel;
+               r->off = curp->pc + andptr - and;
+       }
+       put4(v);
+       return;
 
 bad:
        diag("asmand: bad address %D", a);
@@ -827,6 +927,10 @@ doasm(Prog *p)
        uchar *t;
        int z, op, ft, tt;
        int32 v, pre;
+       Reloc rel, *r;
+       Adr *a;
+       
+       curp = p;       // TODO
 
        pre = prefixof(&p->from);
        if(pre)
@@ -872,7 +976,7 @@ found:
        case Pb:        /* botch */
                break;
        }
-       v = vaddr(&p->from);
+
        op = o->op[z];
        switch(t[2]) {
        default:
@@ -963,21 +1067,24 @@ found:
                break;
 
        case Zm_ibo:
-               v = vaddr(&p->to);
                *andptr++ = op;
                asmand(&p->from, o->op[z+1]);
-               *andptr++ = v;
+               *andptr++ = vaddr(&p->to, nil);
                break;
 
        case Zibo_m:
                *andptr++ = op;
                asmand(&p->to, o->op[z+1]);
-               *andptr++ = v;
+               *andptr++ = vaddr(&p->from, nil);
                break;
 
        case Z_ib:
-               v = vaddr(&p->to);
        case Zib_:
+               if(t[2] == Zib_)
+                       a = &p->from;
+               else
+                       a = &p->to;
+               v = vaddr(a, nil);
                if(HEADTYPE == 8 && p->as == AINT && v == 3) {
                        // native client disallows all INT instructions.
                        // translate INT $3 to HLT.
@@ -990,69 +1097,71 @@ found:
 
        case Zib_rp:
                *andptr++ = op + reg[p->to.type];
-               *andptr++ = v;
+               *andptr++ = vaddr(&p->from, nil);
                break;
 
        case Zil_rp:
                *andptr++ = op + reg[p->to.type];
                if(o->prefix == Pe) {
+                       v = vaddr(&p->from, nil);
                        *andptr++ = v;
                        *andptr++ = v>>8;
                }
                else
-                       put4(v);
+                       relput4(p, &p->from);
                break;
 
        case Zib_rr:
                *andptr++ = op;
                asmand(&p->to, reg[p->to.type]);
-               *andptr++ = v;
+               *andptr++ = vaddr(&p->from, nil);
                break;
 
        case Z_il:
-               v = vaddr(&p->to);
        case Zil_:
-               *andptr++ = op;
-               if(o->prefix == Pe) {
-                       *andptr++ = v;
-                       *andptr++ = v>>8;
-               }
+               if(t[2] == Zil_)
+                       a = &p->from;
                else
-                       put4(v);
-               break;
-
-       case Zm_ilo:
-               v = vaddr(&p->to);
+                       a = &p->to;
                *andptr++ = op;
-               asmand(&p->from, o->op[z+1]);
                if(o->prefix == Pe) {
+                       v = vaddr(a, nil);
                        *andptr++ = v;
                        *andptr++ = v>>8;
                }
                else
-                       put4(v);
+                       relput4(p, a);
                break;
 
+       case Zm_ilo:
        case Zilo_m:
                *andptr++ = op;
-               asmand(&p->to, o->op[z+1]);
+               if(t[2] == Zilo_m) {
+                       a = &p->from;
+                       asmand(&p->to, o->op[z+1]);
+               } else {
+                       a = &p->to;
+                       asmand(&p->from, o->op[z+1]);
+               }
                if(o->prefix == Pe) {
+                       v = vaddr(a, nil);
                        *andptr++ = v;
                        *andptr++ = v>>8;
                }
                else
-                       put4(v);
+                       relput4(p, a);
                break;
 
        case Zil_rr:
                *andptr++ = op;
                asmand(&p->to, reg[p->to.type]);
                if(o->prefix == Pe) {
+                       v = vaddr(&p->from, nil);
                        *andptr++ = v;
                        *andptr++ = v>>8;
                }
                else
-                       put4(v);
+                       relput4(p, &p->from);
                break;
 
        case Z_rp:
@@ -1067,92 +1176,128 @@ found:
                *andptr++ = op;
                asmand(&p->to, reg[p->to.type]);
                break;
-
-       case Zbr:
-               q = p->pcond;
-               if(q) {
-                       v = q->pc - p->pc - 2;
-                       if(q->pc == 0)
-                               v = 0;
-                       if(v >= -128 && v <= 127 && !p->bigjmp) {
-                               *andptr++ = op;
-                               *andptr++ = v;
-                       } else {
-                               p->bigjmp = 1;
-                               v -= 6-2;
-                               *andptr++ = 0x0f;
-                               *andptr++ = o->op[z+1];
-                               *andptr++ = v;
-                               *andptr++ = v>>8;
-                               *andptr++ = v>>16;
-                               *andptr++ = v>>24;
-                       }
-               }
-               break;
-
+       
        case Zcall:
                q = p->pcond;
-               if(q) {
-                       v = q->pc - p->pc - 5;
-                       *andptr++ = op;
-                       *andptr++ = v;
-                       *andptr++ = v>>8;
-                       *andptr++ = v>>16;
-                       *andptr++ = v>>24;
+               if(q == nil) {
+                       diag("call without target");
+                       errorexit();
+               }
+               if(q->as != ATEXT) {
+                       // Could handle this case by making D_PCREL
+                       // record the Prog* instead of the Sym*, but let's
+                       // wait until the need arises.
+                       diag("call of non-TEXT");
+                       errorexit();
                }
-               break;
-
-       case Zcallcon:
-               v = p->to.offset - p->pc - 5;
                *andptr++ = op;
-               *andptr++ = v;
-               *andptr++ = v>>8;
-               *andptr++ = v>>16;
-               *andptr++ = v>>24;
+               r = addrel(cursym);
+               r->off = p->pc + andptr - and;
+               r->type = D_PCREL;
+               r->siz = 4;
+               r->sym = q->from.sym;
+               put4(0);
                break;
 
+       case Zbr:
        case Zjmp:
                q = p->pcond;
-               if(q) {
-                       v = q->pc - p->pc - 2;
-                       if(q->pc == 0)
-                               v = 0;
-                       if(v >= -128 && v <= 127 && !p->bigjmp) {
+               if(q == nil) {
+                       diag("jmp/branch without target");
+                       errorexit();
+               }
+               if(q->as == ATEXT) {
+                       // jump out of function
+                       if(t[2] == Zbr) {
+                               diag("branch to ATEXT");
+                               errorexit();
+                       }
+                       *andptr++ = o->op[z+1];
+                       r = addrel(cursym);
+                       r->off = p->pc + andptr - and;
+                       r->sym = q->from.sym;
+                       r->type = D_PCREL;
+                       r->siz = 4;
+                       put4(0);
+                       break;
+               }
+               
+               // Assumes q is in this function.
+               // TODO: Check in input, preserve in brchain.
+               
+               // Fill in backward jump now.
+               if(p->back & 1) {
+                       v = q->pc - (p->pc + 2);
+                       if(v >= -128) {
                                *andptr++ = op;
                                *andptr++ = v;
                        } else {
-                               p->bigjmp = 1;
                                v -= 5-2;
+                               if(t[2] == Zbr) {
+                                       *andptr++ = 0x0f;
+                                       v--;
+                               }
                                *andptr++ = o->op[z+1];
                                *andptr++ = v;
                                *andptr++ = v>>8;
                                *andptr++ = v>>16;
                                *andptr++ = v>>24;
                        }
+                       break;
+               }
+
+               // Annotate target; will fill in later.
+               p->forwd = q->comefrom;
+               q->comefrom = p;
+               if(p->back & 2) { // short
+                       *andptr++ = op;
+                       *andptr++ = 0;
+               } else {
+                       if(t[2] == Zbr)
+                               *andptr++ = 0x0f;
+                       *andptr++ = o->op[z+1];
+                       *andptr++ = 0;
+                       *andptr++ = 0;
+                       *andptr++ = 0;
+                       *andptr++ = 0;
                }
                break;
 
+       case Zcallcon:
        case Zjmpcon:
-               v = p->to.offset - p->pc - 5;
-               *andptr++ = o->op[z+1];
-               *andptr++ = v;
-               *andptr++ = v>>8;
-               *andptr++ = v>>16;
-               *andptr++ = v>>24;
+               if(t[2] == Zcallcon)
+                       *andptr++ = op;
+               else
+                       *andptr++ = o->op[z+1];
+               r = addrel(cursym);
+               r->off = p->pc + andptr - and;
+               r->type = D_PCREL;
+               r->siz = 4;
+               r->add = p->to.offset;
+               put4(0);
                break;
 
        case Zloop:
                q = p->pcond;
-               if(q) {
-                       v = q->pc - p->pc - 2;
-                       if(v < -128 && v > 127)
-                               diag("loop too far: %P", p);
-                       *andptr++ = op;
-                       *andptr++ = v;
+               if(q == nil) {
+                       diag("loop without target");
+                       errorexit();
                }
+               v = q->pc - p->pc - 2;
+               if(v < -128 && v > 127)
+                       diag("loop too far: %P", p);
+               *andptr++ = op;
+               *andptr++ = v;
                break;
 
        case Zbyte:
+               v = vaddr(&p->from, &rel);
+               if(rel.siz != 0) {
+                       rel.siz = op;
+                       r = addrel(cursym);
+                       *r = rel;
+                       r->off = p->pc + andptr - and;
+               }
                *andptr++ = v;
                if(op > 1) {
                        *andptr++ = v>>8;
@@ -1319,6 +1464,8 @@ asmins(Prog *p)
        if(HEADTYPE == 8) {
                ulong npc;
                static Prog *prefix;
+               
+               // TODO: adjust relocations, like 6l does for rex prefix
 
                // native client
                // - pad indirect jump targets (aka ATEXT) to 32-byte boundary