]> Cypherpunks repositories - gostls13.git/commitdiff
6l: function at a time code layout
authorRuss Cox <rsc@golang.org>
Fri, 15 Oct 2010 19:18:47 +0000 (15:18 -0400)
committerRuss Cox <rsc@golang.org>
Fri, 15 Oct 2010 19:18:47 +0000 (15:18 -0400)
Also change the span-dependent jump algorithm
to use fewer iterations:

* resolve forward jumps at their targets (comefrom list)
* mark jumps as small or big and only do small->big
* record whether a jump failed to be encodable

These changes mean that a function with only small
jumps can be laid out in a single iteration, and the
vast majority of functions take just two iterations.
I was seeing a maximum of 5 iterations before; the
max now is 3 and there are fewer that get even that far.

R=ken2
CC=golang-dev
https://golang.org/cl/2537041

src/cmd/5l/5.out.h
src/cmd/6l/6.out.h
src/cmd/6l/asm.c
src/cmd/6l/l.h
src/cmd/6l/span.c
src/cmd/8l/8.out.h
src/cmd/ld/data.c
src/cmd/ld/lib.h

index a3b4f250242e5ab2a0af5a2511ecfecef0798150..3c7223d2889ca0b7583f8c4f97d5c2aee394cc26 100644 (file)
@@ -246,6 +246,7 @@ enum        as
 
 /* internal only */
 #define        D_SIZE          (D_NONE+40)
+#define        D_PCREL         (D_NONE+41)
 
 /*
  * this is the ranlib header
index 38117f398afd7c9237aa2044f7701ba836c5ef9b..709f82ccc1d5b5d53b1f569809b9463a83885ce3 100644 (file)
@@ -824,6 +824,7 @@ enum
        D_INDIR,        /* additive */
 
        D_SIZE = D_INDIR + D_INDIR,     /* 6l internal */
+       D_PCREL,
 
        T_TYPE          = 1<<0,
        T_INDEX         = 1<<1,
index 839ebdf3ded7898385e17b4c6f78163b2fb286aa..b9358a8a0a5f500c9bb47a684d3488a485c695f7 100644 (file)
@@ -344,10 +344,8 @@ phsh(ElfPhdr *ph, ElfShdr *sh)
 void
 asmb(void)
 {
-       Prog *p;
        int32 v, magic;
        int a, dynsym;
-       uchar *op1;
        vlong vl, va, startva, fo, w, symo, elfsymo, elfstro, elfsymsize, machlink;
        vlong symdatva = SYMDATVA;
        ElfEhdr *eh;
@@ -366,35 +364,8 @@ asmb(void)
        elfsymo = 0;
        seek(cout, HEADR, 0);
        pc = INITTEXT;
-
-       for(cursym = textp; cursym != nil; cursym = cursym->next) {
-               for(p = cursym->text; p != P; p = p->link) {
-                       if(p->pc != pc) {
-                               if(!debug['a'])
-                                       print("%P\n", curp);
-                               diag("phase error %llux sb %llux in %s", p->pc, pc, TNAME);
-                               pc = p->pc;
-                       }
-                       curp = p;
-                       asmins(p);
-                       a = (andptr - and);
-                       if(cbc < a)
-                               cflush();
-                       if(debug['a']) {
-                               Bprint(&bso, pcstr, pc);
-                               for(op1 = and; op1 < andptr; op1++)
-                                       Bprint(&bso, "%.2ux", *op1);
-                               for(; op1 < and+Maxand; op1++)
-                                       Bprint(&bso, "  ");
-                               Bprint(&bso, "%P\n", curp);
-                       }
-                       memmove(cbp, and, a);
-                       cbp += a;
-                       pc += a;
-                       cbc -= a;
-               }
-       }
-       cflush();
+       codeblk(pc, segtext.sect->len);
+       pc += segtext.sect->len;
 
        /* output read-only data in text segment */
        sect = segtext.sect->next;
index b9a283ad34f97010b497bd38a407e636c9a28999..e3f409e07832f893277aba38dd92615e827ab601 100644 (file)
@@ -94,8 +94,8 @@ struct        Prog
        Adr     from;
        Adr     to;
        Prog*   forwd;
+       Prog*   comefrom;
        Prog*   link;
-       Prog*   dlink;
        Prog*   pcond;  /* work on this */
        vlong   pc;
        int32   spadj;
index b29737584b3076568bd4f13bc65489ae570b675b..7ea0b636948e24e556a32d0a6a32a70b1cad486c 100644 (file)
 
 static int     rexflag;
 static int     asmode;
+static vlong   vaddr(Adr*, Reloc*);
+
+void
+span1(Sym *s)
+{
+       Prog *p, *q;
+       int32 c, v, loop;
+       uchar *bp;
+       int n, m, i;
+
+       cursym = s;
+
+       for(p = s->text; p != P; p = p->link) {
+               p->back = 2;    // use short branches first time through
+               if((q = p->pcond) != P && (q->back & 2))
+                       p->back |= 1;   // backward jump
+
+               if(p->as == AADJSP) {
+                       p->to.type = D_SP;
+                       v = -p->from.offset;
+                       p->from.offset = v;
+                       p->as = p->mode != 64? AADDL: AADDQ;
+                       if(v < 0) {
+                               p->as = p->mode != 64? ASUBL: ASUBQ;
+                               v = -v;
+                               p->from.offset = v;
+                       }
+                       if(v == 0)
+                               p->as = ANOP;
+               }
+       }
+       
+       n = 0;
+       do {
+               loop = 0;
+               memset(s->r, 0, s->nr*sizeof s->r[0]);
+               s->nr = 0;
+               s->np = 0;
+               c = 0;
+               for(p = s->text; p != P; p = p->link) {
+                       p->pc = c;
+
+                       // process forward jumps to p
+                       for(q = p->comefrom; q != P; q = q->forwd) {
+                               v = p->pc - (q->pc + q->mark);
+                               if(q->back & 2) {       // short
+                                       if(v > 127) {
+                                               loop++;
+                                               q->back ^= 2;
+                                       }
+                                       s->p[q->pc+1] = v;
+                               } else {
+                                       bp = s->p + q->pc + q->mark - 4;
+                                       *bp++ = v;
+                                       *bp++ = v>>8;
+                                       *bp++ = v>>16;
+                                       *bp++ = v>>24;
+                               }       
+                       }
+                       p->comefrom = P;
+
+                       asmins(p);
+                       p->pc = c;
+                       m = andptr-and;
+                       symgrow(s, p->pc+m);
+                       memmove(s->p+p->pc, and, m);
+                       p->mark = m;
+                       c += m;
+               }
+               if(++n > 20) {
+                       diag("span must be looping");
+                       errorexit();
+               }
+       } while(loop);
+       s->size = c;
+
+       if(debug['a'] > 1) {
+               print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0);
+               for(i=0; i<s->np; i++) {
+                       print(" %.2ux", s->p[i]);
+                       if(i%16 == 15)
+                               print("\n  %.6ux", i+1);
+               }
+               if(i%16)
+                       print("\n");
+       
+               for(i=0; i<s->nr; i++) {
+                       Reloc *r;
+                       
+                       r = &s->r[i];
+                       print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add);
+               }
+       }
+}
 
 void
 span(void)
 {
        Prog *p, *q;
        int32 v;
-       vlong c, idat, etext, rosize;
-       int m, n, again;
+       vlong c;
+       int n;
+       Sym *s;
        Section *sect, *rosect;
-       Sym *sym;
 
+       if(debug['v'])
+               Bprint(&bso, "%5.2f span\n", cputime());
+
+       segtext.rwx = 05;
+       segtext.vaddr = INITTEXT - HEADR;
+       
        xdefine("etext", STEXT, 0L);
        xdefine("rodata", SRODATA, 0L);
        xdefine("erodata", SRODATA, 0L);
 
-       idat = INITDAT;
+       // NOTE(rsc): If we get rid of the globals we should
+       // be able to parallelize these iterations.
        for(cursym = textp; cursym != nil; cursym = cursym->next) {
+               if(!cursym->reachable)
+                       continue;
+               
                for(p = cursym->text; p != P; p = p->link) {
                        n = 0;
                        if(p->to.type == D_BRANCH)
@@ -75,99 +179,47 @@ span(void)
                                        p->as = ANOP;
                        }
                }
+               span1(cursym);
        }
-       n = 0;
        
-       rosect = segtext.sect->next;
-       rosize = rosect->len;
-
-start:
-       if(debug['v'])
-               Bprint(&bso, "%5.2f span\n", cputime());
-       Bflush(&bso);
+       // Next, loop over symbols to assign actual PCs.
+       // Could parallelize here too, by assigning to text 
+       // and then letting threads copy down, but probably not worth it.
        c = INITTEXT;
+       sect = segtext.sect;
+       sect->vaddr = c;
        for(cursym = textp; cursym != nil; cursym = cursym->next) {
-               for(p = cursym->text; p != P; p = p->link) {
-                       if(p->to.type == D_BRANCH)
-                               if(p->back)
-                                       p->pc = c;
-                       asmins(p);
-                       p->pc = c;
-                       m = andptr-and;
-                       p->mark = m;
-                       c += m;
-               }
+               if(!cursym->reachable)
+                       continue;
+               cursym->value = c;
+               for(p = cursym->text; p != P; p = p->link)
+                       p->pc += c;
+               c += cursym->size;
        }
+       sect->len = c - sect->vaddr;
+       xdefine("etext", STEXT, c);
+       if(debug['v'])
+               Bprint(&bso, "etext = %llux\n", c);
 
-loop:
-       n++;
+       xdefine("rodata", SRODATA, c);
+       if(INITRND)
+               c = rnd(c, INITRND);
+       rosect = segtext.sect->next;
+       rosect->vaddr = c;
+       c += rosect->len;
+       xdefine("erodata", SRODATA, c);
+       textsize = c - INITTEXT;
        if(debug['v'])
-               Bprint(&bso, "%5.2f span %d\n", cputime(), n);
+               Bprint(&bso, "erodata = %llux", c);
        Bflush(&bso);
-       if(n > 50) {
-               print("span must be looping\n");
-               errorexit();
-       }
-       again = 0;
-       c = INITTEXT;
-       for(cursym = textp; cursym != nil; cursym = cursym->next) {
-               for(p = cursym->text; p != P; p = p->link) {
-                       if(p->to.type == D_BRANCH || p->back & 0100) {
-                               if(p->back)
-                                       p->pc = c;
-                               asmins(p);
-                               m = andptr-and;
-                               if(m != p->mark) {
-                                       p->mark = m;
-                                       again++;
-                               }
-                       }
-                       p->pc = c;
-                       c += p->mark;
-               }
-       }
-       if(again) {
-               textsize = c;
-               goto loop;
-       }
-       etext = c;
-
-       if(rosect) {
-               if(INITRND)
-                       c = rnd(c, INITRND);
-               if(rosect->vaddr != c){
-                       rosect->vaddr = c;
-                       goto start;
-               }
-               c += rosect->len;
-       }
 
-       if(INITRND) {
-               INITDAT = rnd(c, INITRND);
-               if(INITDAT != idat) {
-                       idat = INITDAT;
-                       goto start;
-               }
-       }
-       
-       xdefine("etext", STEXT, etext);
+       segtext.len = c - segtext.vaddr;
+       segtext.filelen = segtext.len;
 
-       if(debug['v'])
-               Bprint(&bso, "etext = %llux\n", c);
-       Bflush(&bso);
-       for(cursym = textp; cursym != nil; cursym = cursym->next)
-               cursym->value = cursym->text->pc;
-       textsize = c - INITTEXT;
-       
-       segtext.rwx = 05;
-       segtext.vaddr = INITTEXT - HEADR;
-       segtext.len = INITDAT - INITTEXT + HEADR;
-       segtext.filelen = textsize + HEADR;
+       if(INITRND)
+               c = rnd(c, INITRND);
+       INITDAT = c;
        
-       sect = segtext.sect;
-       sect->vaddr = INITTEXT;
-       sect->len = etext - sect->vaddr;
-
        // Adjust everything now that we know INITDAT.
        // This will get simpler when everything is relocatable
        // and we can run span before dodata.
@@ -180,15 +232,15 @@ loop:
        xdefine("edata", SBSS, INITDAT+segdata.filelen);
        xdefine("end", SBSS, INITDAT+segdata.len);
 
-       for(sym=datap; sym!=nil; sym=sym->next) {
-               switch(sym->type) {
+       for(s=datap; s!=nil; s=s->next) {
+               switch(s->type) {
                case SELFDATA:
                case SRODATA:
-                       sym->value += rosect->vaddr;
+                       s->value += rosect->vaddr;
                        break;
                case SDATA:
                case SBSS:
-                       sym->value += INITDAT;
+                       s->value += INITDAT;
                        break;
                }
        }
@@ -624,6 +676,23 @@ put4(int32 v)
        andptr += 4;
 }
 
+static void
+relput4(Prog *p, Adr *a)
+{
+       vlong v;
+       Reloc rel, *r;
+       
+       v = vaddr(a, &rel);
+       if(rel.siz != 0) {
+               if(rel.siz != 4)
+                       diag("bad reloc");
+               r = addrel(cursym);
+               *r = rel;
+               r->off = p->pc + andptr - and;
+       }
+       put4(v);
+}
+
 static void
 put8(vlong v)
 {
@@ -638,26 +707,50 @@ put8(vlong v)
        andptr += 8;
 }
 
-static vlong vaddr(Adr*);
+/*
+static void
+relput8(Prog *p, Adr *a)
+{
+       vlong v;
+       Reloc rel, *r;
+       
+       v = vaddr(a, &rel);
+       if(rel.siz != 0) {
+               r = addrel(cursym);
+               *r = rel;
+               r->siz = 8;
+               r->off = p->pc + andptr - and;
+       }
+       put8(v);
+}
+*/
 
 vlong
 symaddr(Sym *s)
 {
-       Adr a;
-
-       a.type = D_ADDR;
-       a.index = D_EXTERN;
-       a.offset = 0;
-       a.sym = s;
-       return vaddr(&a);
+       switch(s->type) {
+       case SFIXED:
+               return s->value;
+       
+       case SMACHO:
+               return INITDAT + segdata.filelen - dynptrsize + s->value;
+       
+       default:
+               if(!s->reachable)
+                       diag("unreachable symbol in symaddr - %s", s->name);
+               return s->value;
+       }
 }
 
 static vlong
-vaddr(Adr *a)
+vaddr(Adr *a, Reloc *r)
 {
        int t;
        vlong v;
        Sym *s;
+       
+       if(r != nil)
+               memset(r, 0, sizeof *r);
 
        t = a->type;
        v = a->offset;
@@ -667,19 +760,24 @@ vaddr(Adr *a)
        case D_STATIC:
        case D_EXTERN:
                s = a->sym;
-               if(s != nil) {
-                       switch(s->type) {
-                       case SFIXED:
-                               v += s->value;
-                               break;
-                       case SMACHO:
-                               v += INITDAT + segdata.filelen - dynptrsize + s->value;
-                               break;
-                       default:
-                               if(!s->reachable)
-                                       diag("unreachable symbol in vaddr - %s", s->name);
-                               v += s->value;
+               switch(s->type) {
+               case SFIXED:
+                       v += s->value;
+                       break;
+               default:
+                       if(!s->reachable)
+                               diag("unreachable symbol in vaddr - %s", s->name);
+                       if(r == nil) {
+                               diag("need reloc for %D", a);
+                               errorexit();
                        }
+                       r->type = D_ADDR;
+                       r->siz = 4;     // TODO: 8 for external symbols
+                       r->off = -1;    // caller must fill in
+                       r->sym = s;
+                       r->add = v;
+                       v = 0;
+                       break;
                }
        }
        return v;
@@ -690,10 +788,12 @@ asmandsz(Adr *a, int r, int rex, int m64)
 {
        int32 v;
        int t, scale;
+       Reloc rel;
 
        rex &= (0x40 | Rxr);
        v = a->offset;
        t = a->type;
+       rel.siz = 0;
        if(a->index != D_NONE) {
                if(t < D_INDIR) { 
                        switch(t) {
@@ -702,7 +802,7 @@ asmandsz(Adr *a, int r, int rex, int m64)
                        case D_STATIC:
                        case D_EXTERN:
                                t = D_NONE;
-                               v = vaddr(a);
+                               v = vaddr(a, &rel);
                                break;
                        case D_AUTO:
                        case D_PARAM:
@@ -715,15 +815,15 @@ asmandsz(Adr *a, int r, int rex, int m64)
                if(t == D_NONE) {
                        *andptr++ = (0 << 6) | (4 << 0) | (r << 3);
                        asmidx(a->scale, a->index, t);
-                       put4(v);
+                       goto putrelv;
                        return;
                }
-               if(v == 0 && t != D_BP && t != D_R13) {
+               if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) {
                        *andptr++ = (0 << 6) | (4 << 0) | (r << 3);
                        asmidx(a->scale, a->index, t);
                        return;
                }
-               if(v >= -128 && v < 128) {
+               if(v >= -128 && v < 128 && rel.siz == 0) {
                        *andptr++ = (1 << 6) | (4 << 0) | (r << 3);
                        asmidx(a->scale, a->index, t);
                        *andptr++ = v;
@@ -731,8 +831,7 @@ asmandsz(Adr *a, int r, int rex, int m64)
                }
                *andptr++ = (2 << 6) | (4 << 0) | (r << 3);
                asmidx(a->scale, a->index, t);
-               put4(v);
-               return;
+               goto putrelv;
        }
        if(t >= D_AL && t <= D_X0+15) {
                if(v)
@@ -750,7 +849,7 @@ asmandsz(Adr *a, int r, int rex, int m64)
                case D_STATIC:
                case D_EXTERN:
                        t = D_NONE;
-                       v = vaddr(a);
+                       v = vaddr(a, &rel);
                        break;
                case D_AUTO:
                case D_PARAM:
@@ -765,14 +864,12 @@ asmandsz(Adr *a, int r, int rex, int m64)
        if(t == D_NONE || (D_CS <= t && t <= D_GS)) {
                if(asmode != 64){
                        *andptr++ = (0 << 6) | (5 << 0) | (r << 3);
-                       put4(v);
-                       return;
+                       goto putrelv;
                }
                /* temporary */
                *andptr++ = (0 <<  6) | (4 << 0) | (r << 3);    /* sib present */
                *andptr++ = (0 << 6) | (4 << 3) | (5 << 0);     /* DS:d32 */
-               put4(v);
-               return;
+               goto putrelv;
        }
        if(t == D_SP || t == D_R12) {
                if(v == 0) {
@@ -788,8 +885,7 @@ asmandsz(Adr *a, int r, int rex, int m64)
                }
                *andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
                asmidx(scale, D_NONE, t);
-               put4(v);
-               return;
+               goto putrelv;
        }
        if(t >= D_AX && t <= D_R15) {
                if(v == 0 && t != D_BP && t != D_R13) {
@@ -803,9 +899,24 @@ asmandsz(Adr *a, int r, int rex, int m64)
                        return;
                }
                *andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
-               put4(v);
-               return;
+               goto putrelv;
        }
+       goto bad;
+       
+putrelv:
+       if(rel.siz != 0) {
+               Reloc *r;
+
+               if(rel.siz != 4) {
+                       diag("bad rel");
+                       goto bad;
+               }
+               r = addrel(cursym);
+               *r = rel;
+               r->off = curp->pc + andptr - and;
+       }
+       put4(v);
+       return;
 
 bad:
        diag("asmand: bad address %D", a);
@@ -1040,6 +1151,10 @@ doasm(Prog *p)
        Movtab *mo;
        int z, op, ft, tt, xo, l, pre;
        vlong v;
+       Reloc rel, *r;
+       Adr *a;
+       
+       curp = p;       // TODO
 
        o = opindex[p->as];
        if(o == nil) {
@@ -1116,7 +1231,7 @@ found:
                        diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
                break;
        }
-       v = vaddr(&p->from);
+
        op = o->op[z];
        if(op == 0x0f) {
                *andptr++ = op;
@@ -1222,64 +1337,74 @@ found:
                break;
 
        case Zm_ibo:
-               v = vaddr(&p->to);
                *andptr++ = op;
                asmando(&p->from, o->op[z+1]);
-               *andptr++ = v;
+               *andptr++ = vaddr(&p->to, nil);
                break;
 
        case Zibo_m:
                *andptr++ = op;
                asmando(&p->to, o->op[z+1]);
-               *andptr++ = v;
+               *andptr++ = vaddr(&p->from, nil);
                break;
 
        case Zibo_m_xm:
                z = mediaop(o, op, t[3], z);
                asmando(&p->to, o->op[z+1]);
-               *andptr++ = v;
+               *andptr++ = vaddr(&p->from, nil);
                break;
 
        case Z_ib:
-               v = vaddr(&p->to);
        case Zib_:
+               if(t[2] == Zib_)
+                       a = &p->from;
+               else
+                       a = &p->to;
                *andptr++ = op;
-               *andptr++ = v;
+               *andptr++ = vaddr(a, nil);
                break;
 
        case Zib_rp:
                rexflag |= regrex[p->to.type] & (Rxb|0x40);
                *andptr++ = op + reg[p->to.type];
-               *andptr++ = v;
+               *andptr++ = vaddr(&p->from, nil);
                break;
 
        case Zil_rp:
                rexflag |= regrex[p->to.type] & Rxb;
                *andptr++ = op + reg[p->to.type];
                if(o->prefix == Pe) {
+                       v = vaddr(&p->from, nil);
                        *andptr++ = v;
                        *andptr++ = v>>8;
                }
                else
-                       put4(v);
+                       relput4(p, &p->from);
                break;
 
        case Zo_iw:
                *andptr++ = op;
                if(p->from.type != D_NONE){
+                       v = vaddr(&p->from, nil);
                        *andptr++ = v;
                        *andptr++ = v>>8;
                }
                break;
 
        case Ziq_rp:
+               v = vaddr(&p->from, &rel);
                l = v>>32;
-               if(l == 0){
+               if(l == 0 && rel.siz != 8){
                        //p->mark |= 0100;
                        //print("zero: %llux %P\n", v, p);
                        rexflag &= ~(0x40|Rxw);
                        rexflag |= regrex[p->to.type] & Rxb;
                        *andptr++ = 0xb8 + reg[p->to.type];
+                       if(rel.type != 0) {
+                               r = addrel(cursym);
+                               *r = rel;
+                               r->off = p->pc + andptr - and;
+                       }
                        put4(v);
                }else if(l == -1 && (v&((uvlong)1<<31))!=0){    /* sign extend */
                        //p->mark |= 0100;
@@ -1291,6 +1416,11 @@ found:
                        //print("all: %llux %P\n", v, p);
                        rexflag |= regrex[p->to.type] & Rxb;
                        *andptr++ = op + reg[p->to.type];
+                       if(rel.type != 0) {
+                               r = addrel(cursym);
+                               *r = rel;
+                               r->off = p->pc + andptr - and;
+                       }
                        put8(v);
                }
                break;
@@ -1298,53 +1428,54 @@ found:
        case Zib_rr:
                *andptr++ = op;
                asmand(&p->to, &p->to);
-               *andptr++ = v;
+               *andptr++ = vaddr(&p->from, nil);
                break;
 
        case Z_il:
-               v = vaddr(&p->to);
        case Zil_:
-               *andptr++ = op;
-               if(o->prefix == Pe) {
-                       *andptr++ = v;
-                       *andptr++ = v>>8;
-               }
+               if(t[2] == Zil_)
+                       a = &p->from;
                else
-                       put4(v);
-               break;
-
-       case Zm_ilo:
-               v = vaddr(&p->to);
+                       a = &p->to;
                *andptr++ = op;
-               asmando(&p->from, o->op[z+1]);
                if(o->prefix == Pe) {
+                       v = vaddr(a, nil);
                        *andptr++ = v;
                        *andptr++ = v>>8;
                }
                else
-                       put4(v);
+                       relput4(p, a);
                break;
 
+       case Zm_ilo:
        case Zilo_m:
                *andptr++ = op;
-               asmando(&p->to, o->op[z+1]);
+               if(t[2] == Zilo_m) {
+                       a = &p->from;
+                       asmando(&p->to, o->op[z+1]);
+               } else {
+                       a = &p->to;
+                       asmando(&p->from, o->op[z+1]);
+               }
                if(o->prefix == Pe) {
+                       v = vaddr(a, nil);
                        *andptr++ = v;
                        *andptr++ = v>>8;
                }
                else
-                       put4(v);
+                       relput4(p, a);
                break;
 
        case Zil_rr:
                *andptr++ = op;
                asmand(&p->to, &p->to);
                if(o->prefix == Pe) {
+                       v = vaddr(&p->from, nil);
                        *andptr++ = v;
                        *andptr++ = v>>8;
                }
                else
-                       put4(v);
+                       relput4(p, &p->from);
                break;
 
        case Z_rp:
@@ -1362,67 +1493,132 @@ found:
                asmand(&p->to, &p->to);
                break;
 
+       case Zcall:
+               q = p->pcond;
+               if(q == nil) {
+                       diag("call without target");
+                       errorexit();
+               }
+               if(q->as != ATEXT) {
+                       // Could handle this case by making D_PCREL
+                       // record the Prog* instead of the Sym*, but let's
+                       // wait until the need arises.
+                       diag("call of non-TEXT");
+                       errorexit();
+               }
+               *andptr++ = op;
+               r = addrel(cursym);
+               r->off = p->pc + andptr - and;
+               r->sym = q->from.sym;
+               r->type = D_PCREL;
+               r->siz = 4;
+               put4(0);
+               break;
+
        case Zbr:
+       case Zjmp:
+               // TODO: jump across functions needs reloc
                q = p->pcond;
-               if(q) {
-                       v = q->pc - p->pc - 2;
-                       if(v >= -128 && v <= 127) {
+               if(q == nil) {
+                       diag("jmp/branch without target");
+                       errorexit();
+               }
+               if(q->as == ATEXT) {
+                       if(t[2] == Zbr) {
+                               diag("branch to ATEXT");
+                               errorexit();
+                       }
+                       *andptr++ = o->op[z+1];
+                       r = addrel(cursym);
+                       r->off = p->pc + andptr - and;
+                       r->sym = q->from.sym;
+                       r->type = D_PCREL;
+                       r->siz = 4;
+                       put4(0);
+                       break;
+               }
+               // Assumes q is in this function.
+               // TODO: Check in input, preserve in brchain.
+
+               // Fill in backward jump now.
+               if(p->back & 1) {
+                       v = q->pc - (p->pc + 2);
+                       if(v >= -128) {
                                *andptr++ = op;
                                *andptr++ = v;
                        } else {
-                               v -= 6-2;
-                               *andptr++ = 0x0f;
+                               v -= 5-2;
+                               if(t[2] == Zbr) {
+                                       *andptr++ = 0x0f;
+                                       v--;
+                               }
                                *andptr++ = o->op[z+1];
                                *andptr++ = v;
                                *andptr++ = v>>8;
                                *andptr++ = v>>16;
                                *andptr++ = v>>24;
                        }
+                       break;
+               }
+               
+               // Annotate target; will fill in later.
+               p->forwd = q->comefrom;
+               q->comefrom = p;
+               if(p->back & 2) { // short
+                       *andptr++ = op;
+                       *andptr++ = 0;
+               } else {
+                       if(t[2] == Zbr)
+                               *andptr++ = 0x0f;
+                       *andptr++ = o->op[z+1];
+                       *andptr++ = 0;
+                       *andptr++ = 0;
+                       *andptr++ = 0;
+                       *andptr++ = 0;
                }
                break;
-
-       case Zcall:
-               q = p->pcond;
-               if(q) {
-                       v = q->pc - p->pc - 5;
+                               
+/*
+               v = q->pc - p->pc - 2;
+               if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
                        *andptr++ = op;
                        *andptr++ = v;
+               } else {
+                       v -= 5-2;
+                       if(t[2] == Zbr) {
+                               *andptr++ = 0x0f;
+                               v--;
+                       }
+                       *andptr++ = o->op[z+1];
+                       *andptr++ = v;
                        *andptr++ = v>>8;
                        *andptr++ = v>>16;
                        *andptr++ = v>>24;
                }
-               break;
-
-       case Zjmp:
-               q = p->pcond;
-               if(q) {
-                       v = q->pc - p->pc - 2;
-                       if(v >= -128 && v <= 127) {
-                               *andptr++ = op;
-                               *andptr++ = v;
-                       } else {
-                               v -= 5-2;
-                               *andptr++ = o->op[z+1];
-                               *andptr++ = v;
-                               *andptr++ = v>>8;
-                               *andptr++ = v>>16;
-                               *andptr++ = v>>24;
-                       }
-               }
+*/
                break;
 
        case Zloop:
                q = p->pcond;
-               if(q) {
-                       v = q->pc - p->pc - 2;
-                       if(v < -128 && v > 127)
-                               diag("loop too far: %P", p);
-                       *andptr++ = op;
-                       *andptr++ = v;
+               if(q == nil) {
+                       diag("loop without target");
+                       errorexit();
                }
+               v = q->pc - p->pc - 2;
+               if(v < -128 && v > 127)
+                       diag("loop too far: %P", p);
+               *andptr++ = op;
+               *andptr++ = v;
                break;
 
        case Zbyte:
+               v = vaddr(&p->from, &rel);
+               if(rel.siz != 0) {
+                       rel.siz = op;
+                       r = addrel(cursym);
+                       *r = rel;
+                       r->off = p->pc + andptr - and;
+               }
                *andptr++ = v;
                if(op > 1) {
                        *andptr++ = v>>8;
@@ -1595,6 +1791,7 @@ void
 asmins(Prog *p)
 {
        int n, np, c;
+       Reloc *r;
 
        rexflag = 0;
        andptr = and;
@@ -1604,7 +1801,7 @@ asmins(Prog *p)
                /*
                 * as befits the whole approach of the architecture,
                 * the rex prefix must appear before the first opcode byte
-                * (and thus after any 66/67/f2/f3 prefix bytes, but
+                * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
                 * before the 0f opcode escape!), or it might be ignored.
                 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
                 */
@@ -1616,6 +1813,11 @@ asmins(Prog *p)
                        if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
                                break;
                }
+               for(r=cursym->r+cursym->nr; r-- > cursym->r; ) {
+                       if(r->off < p->pc)
+                               break;
+                       r->off++;
+               }
                memmove(and+np+1, and+np, n-np);
                and[np] = 0x40 | rexflag;
                andptr++;
index 9f63a23673502d3d3253629ca01c9228b4e45fb2..4057fb27aeb232e61d36192919271a615fa27199 100644 (file)
@@ -499,6 +499,7 @@ enum
 
        D_CONST2 = D_INDIR+D_INDIR,
        D_SIZE, /* 8l internal */
+       D_PCREL,
 
        T_TYPE          = 1<<0,
        T_INDEX         = 1<<1,
index 353277a74df6c9336dff7da25de77e1e79b4bbff..7e1282969cffbb38a3979ff64d9f92b7ce49a625 100644 (file)
@@ -156,6 +156,9 @@ relocsym(Sym *s)
                case D_ADDR:
                        o = symaddr(r->sym);
                        break;
+               case D_PCREL:
+                       o = symaddr(r->sym) - (s->value + r->off + r->siz);
+                       break;
                case D_SIZE:
                        o = r->sym->size;
                        break;
@@ -190,11 +193,8 @@ reloc(void)
 
        for(s=textp; s!=S; s=s->next)
                relocsym(s);
-       for(s=datap; s!=S; s=s->next) {
-               if(!s->reachable)
-                       diag("unerachable? %s", s->name);
+       for(s=datap; s!=S; s=s->next)
                relocsym(s);
-       }
 }
 
 void
@@ -341,6 +341,70 @@ blk(Sym *allsym, int32 addr, int32 size)
        cflush();
 }
                        
+void
+codeblk(int32 addr, int32 size)
+{
+       Sym *sym;
+       int32 eaddr, i, n, epc;
+       Prog *p;
+       uchar *q;
+
+       if(debug['a'])
+               Bprint(&bso, "codeblk [%#x,%#x) at offset %#llx\n", addr, addr+size, seek(cout, 0, 1));
+
+       blk(textp, addr, size);
+
+       /* again for printing */
+       if(!debug['a'])
+               return;
+
+       for(sym = textp; sym != nil; sym = sym->next) {
+               if(!sym->reachable)
+                       continue;
+               if(sym->value >= addr)
+                       break;
+       }
+
+       eaddr = addr + size;
+       for(; sym != nil; sym = sym->next) {
+               if(!sym->reachable)
+                       continue;
+               if(sym->value >= eaddr)
+                       break;
+
+               if(addr < sym->value) {
+                       Bprint(&bso, "%-20s %.8llux|", "_", addr);
+                       for(; addr < sym->value; addr++)
+                               Bprint(&bso, " %.2ux", 0);
+                       Bprint(&bso, "\n");
+               }
+               p = sym->text;
+               Bprint(&bso, "%-20s %.8llux| %P\n", sym->name, addr, p);
+               for(p = p->link; p != P; p = p->link) {
+                       if(p->link != P)
+                               epc = p->link->pc;
+                       else
+                               epc = sym->value + sym->size;
+                       Bprint(&bso, "%.6ux\t", p->pc);
+                       q = sym->p + p->pc - sym->value;
+                       n = epc - p->pc;
+                       for(i=0; i<n; i++)
+                               Bprint(&bso, "%.2ux", *q++);
+                       for(; i < 10; i++)
+                               Bprint(&bso, "  ");
+                       Bprint(&bso, " | %P\n", p);
+                       addr += n;
+               }
+       }
+
+       if(addr < eaddr) {
+               Bprint(&bso, "%-20s %.8llux|", "_", addr);
+               for(; addr < eaddr; addr++)
+                       Bprint(&bso, " %.2ux", 0);
+       }
+       Bflush(&bso);
+}
+                       
 void
 datblk(int32 addr, int32 size)
 {
@@ -348,6 +412,9 @@ datblk(int32 addr, int32 size)
        int32 eaddr;
        uchar *p, *ep;
 
+       if(debug['a'])
+               Bprint(&bso, "datblk [%#x,%#x) at offset %#llx\n", addr, addr+size, seek(cout, 0, 1));
+
        blk(datap, addr, size);
 
        /* again for printing */
@@ -363,10 +430,8 @@ datblk(int32 addr, int32 size)
                if(sym->value >= eaddr)
                        break;
                if(addr < sym->value) {
-                       Bprint(&bso, "%-20s %.8ux|", "(pre-pad)", addr);
-                       for(; addr < sym->value; addr++)
-                               Bprint(&bso, " %.2ux", 0);
-                       Bprint(&bso, "\n");
+                       Bprint(&bso, "%-20s %.8ux| 00 ...\n", "(pre-pad)", addr);
+                       addr = sym->value;
                }
                Bprint(&bso, "%-20s %.8ux|", sym->name, addr);
                p = sym->p;
@@ -379,11 +444,9 @@ datblk(int32 addr, int32 size)
                Bprint(&bso, "\n");
        }
 
-       if(addr < eaddr) {
-               Bprint(&bso, "%-20s %.8ux|", "(post-pad)", addr);
-               for(; addr < eaddr; addr++)
-                       Bprint(&bso, " %.2ux", 0);
-       }
+       if(addr < eaddr)
+               Bprint(&bso, "%-20s %.8ux| 00 ...\n", "(post-pad)", addr);
+       Bprint(&bso, "%-20s %.8ux|\n", "", eaddr);
 }
 
 void
index f9557e38350ca968ceff8f9b7ce28a05f4aaa34b..5d09bd2c983dbb7bfdefd4c965fe922d41cb9435 100644 (file)
@@ -132,6 +132,7 @@ char*       expandpkg(char*, char*);
 void   deadcode(void);
 void   ewrite(int, void*, int);
 Reloc* addrel(Sym*);
+void   codeblk(int32, int32);
 void   datblk(int32, int32);
 Sym*   datsort(Sym*);
 void   reloc(void);