#include "../ld/dwarf.h"
+// TODO(austin): ABI v1 uses /usr/lib/ld.so.1
char linuxdynld[] = "/lib64/ld64.so.1";
char freebsddynld[] = "XXX";
char openbsddynld[] = "XXX";
int nelfsym = 1;
+static void gencallstub(int abicase, LSym *stub, LSym *targ);
+static void addpltsym(Link*, LSym*);
+static LSym* ensureglinkresolver(void);
+
void
gentext(void)
{
+ LSym *s, *stub, **pprevtextp;
+ Reloc *r;
+ char *n;
+ uint32 o1;
+ uchar *cast;
+ int i;
+
+ // The ppc64 ABI PLT has similar concepts to other
+ // architectures, but is laid out quite differently. When we
+ // see an R_PPC64_REL24 relocation to a dynamic symbol
+ // (indicating that the call needs to go through the PLT), we
+ // generate up to three stubs and reserve a PLT slot.
+ //
+ // 1) The call site will be bl x; nop (where the relocation
+ // applies to the bl). We rewrite this to bl x_stub; ld
+ // r2,24(r1). The ld is necessary because x_stub will save
+ // r2 (the TOC pointer) at 24(r1) (the "TOC save slot").
+ //
+ // 2) We reserve space for a pointer in the .plt section (once
+ // per referenced dynamic function). .plt is a data
+ // section filled solely by the dynamic linker (more like
+ // .plt.got on other architectures). Initially, the
+ // dynamic linker will fill each slot with a pointer to the
+ // corresponding x@plt entry point.
+ //
+ // 3) We generate the "call stub" x_stub (once per dynamic
+ // function/object file pair). This saves the TOC in the
+ // TOC save slot, reads the function pointer from x's .plt
+ // slot and calls it like any other global entry point
+ // (including setting r12 to the function address).
+ //
+ // 4) We generate the "symbol resolver stub" x@plt (once per
+ // dynamic function). This is solely a branch to the glink
+ // resolver stub.
+ //
+ // 5) We generate the glink resolver stub (only once). This
+ // computes which symbol resolver stub we came through and
+ // invokes the dynamic resolver via a pointer provided by
+ // the dynamic linker. This will patch up the .plt slot to
+ // point directly at the function so future calls go
+ // straight from the call stub to the real function, and
+ // then call the function.
+
+ // NOTE: It's possible we could make ppc64 closer to other
+ // architectures: ppc64's .plt is like .plt.got on other
+ // platforms and ppc64's .glink is like .plt on other
+ // platforms.
+
+ // Find all R_PPC64_REL24 relocations that reference dynamic
+ // imports. Reserve PLT entries for these symbols and
+ // generate call stubs. The call stubs need to live in .text,
+ // which is why we need to do this pass this early.
+ //
+ // This assumes "case 1" from the ABI, where the caller needs
+ // us to save and restore the TOC pointer.
+ pprevtextp = &ctxt->textp;
+ for(s=*pprevtextp; s!=S; pprevtextp=&s->next, s=*pprevtextp) {
+ for(r=s->r; r<s->r+s->nr; r++) {
+ if(!(r->type == 256 + R_PPC64_REL24 &&
+ r->sym->type == SDYNIMPORT))
+ continue;
+
+ // Reserve PLT entry and generate symbol
+ // resolver
+ addpltsym(ctxt, r->sym);
+
+ // Generate call stub
+ n = smprint("%s.%s", s->name, r->sym->name);
+ stub = linklookup(ctxt, n, 0);
+ free(n);
+ stub->reachable |= s->reachable;
+ if(stub->size == 0) {
+ // Need outer to resolve .TOC.
+ stub->outer = s;
+
+ // Link in to textp before s (we could
+ // do it after, but would have to skip
+ // the subsymbols)
+ *pprevtextp = stub;
+ stub->next = s;
+ pprevtextp = &stub->next;
+
+ gencallstub(1, stub, r->sym);
+ }
+
+ // Update the relocation to use the call stub
+ r->sym = stub;
+
+ // Restore TOC after bl. The compiler put a
+ // nop here for us to overwrite.
+ o1 = 0xe8410018; // ld r2,24(r1)
+ cast = (uchar*)&o1;
+ for(i=0; i<4; i++)
+ s->p[r->off+4+i] = cast[inuxi4[i]];
+ }
+ }
+}
+
+// Construct a call stub in stub that calls symbol targ via its PLT
+// entry.
+static void
+gencallstub(int abicase, LSym *stub, LSym *targ)
+{
+ LSym *plt;
+ Reloc *r;
+
+ if(abicase != 1)
+ // If we see R_PPC64_TOCSAVE or R_PPC64_REL24_NOTOC
+ // relocations, we'll need to implement cases 2 and 3.
+ sysfatal("gencallstub only implements case 1 calls");
+
+ plt = linklookup(ctxt, ".plt", 0);
+
+ stub->type = STEXT;
+
+ // Save TOC pointer in TOC save slot
+ adduint32(ctxt, stub, 0xf8410018); // std r2,24(r1)
+
+ // Load the function pointer from the PLT.
+ r = addrel(stub);
+ r->off = stub->size;
+ r->sym = plt;
+ r->add = targ->plt;
+ r->siz = 2;
+ if(ctxt->arch->endian == BigEndian)
+ r->off += r->siz;
+ r->type = R_POWER_TOC;
+ r->variant = RV_POWER_HA;
+ adduint32(ctxt, stub, 0x3d820000); // addis r12,r2,targ@plt@toc@ha
+ r = addrel(stub);
+ r->off = stub->size;
+ r->sym = plt;
+ r->add = targ->plt;
+ r->siz = 2;
+ if(ctxt->arch->endian == BigEndian)
+ r->off += r->siz;
+ r->type = R_POWER_TOC;
+ r->variant = RV_POWER_LO;
+ adduint32(ctxt, stub, 0xe98c0000); // ld r12,targ@plt@toc@l(r12)
+
+ // Jump to the loaded pointer
+ adduint32(ctxt, stub, 0x7d8903a6); // mtctr r12
+ adduint32(ctxt, stub, 0x4e800420); // bctr
}
void
adddynrela(LSym *rel, LSym *s, Reloc *r)
{
- // TODO(minux)
USED(rel); USED(s); USED(r);
+ sysfatal("adddynrela not implemented");
}
void
adddynrel(LSym *s, Reloc *r)
{
- LSym *targ;
-
- // TODO(minux)
+ LSym *targ, *rela;
targ = r->sym;
ctxt->cursym = s;
+
+ switch(r->type) {
+ default:
+ if(r->type >= 256) {
+ diag("unexpected relocation type %d", r->type);
+ return;
+ }
+ break;
+
+ // Handle relocations found in ELF object files.
+ case 256 + R_PPC64_REL24:
+ r->type = R_CALLPOWER;
+ // This is a local call, so the caller isn't setting
+ // up r12 and r2 is the same for the caller and
+ // callee. Hence, we need to go to the local entry
+ // point. (If we don't do this, the callee will try
+ // to use r12 to compute r2.)
+ r->add += r->sym->localentry * 4;
+ if(targ->type == SDYNIMPORT)
+ // Should have been handled in elfsetupplt
+ diag("unexpected R_PPC64_REL24 for dyn import");
+ return;
+
+ case 256 + R_PPC64_ADDR64:
+ r->type = R_ADDR;
+ if(targ->type == SDYNIMPORT) {
+ // These happen in .toc sections
+ adddynsym(ctxt, targ);
+
+ rela = linklookup(ctxt, ".rela", 0);
+ addaddrplus(ctxt, rela, s, r->off);
+ adduint64(ctxt, rela, ELF64_R_INFO(targ->dynid, R_PPC64_ADDR64));
+ adduint64(ctxt, rela, r->add);
+ r->type = 256; // ignore during relocsym
+ }
+ return;
+
+ case 256 + R_PPC64_TOC16:
+ r->type = R_POWER_TOC;
+ r->variant = RV_POWER_LO | RV_CHECK_OVERFLOW;
+ return;
+
+ case 256 + R_PPC64_TOC16_LO:
+ r->type = R_POWER_TOC;
+ r->variant = RV_POWER_LO;
+ return;
+
+ case 256 + R_PPC64_TOC16_HA:
+ r->type = R_POWER_TOC;
+ r->variant = RV_POWER_HA | RV_CHECK_OVERFLOW;
+ return;
+
+ case 256 + R_PPC64_TOC16_HI:
+ r->type = R_POWER_TOC;
+ r->variant = RV_POWER_HI | RV_CHECK_OVERFLOW;
+ return;
+
+ case 256 + R_PPC64_TOC16_DS:
+ r->type = R_POWER_TOC;
+ r->variant = RV_POWER_DS | RV_CHECK_OVERFLOW;
+ return;
+
+ case 256 + R_PPC64_TOC16_LO_DS:
+ r->type = R_POWER_TOC;
+ r->variant = RV_POWER_DS;
+ return;
+
+ case 256 + R_PPC64_REL16_LO:
+ r->type = R_PCREL;
+ r->variant = RV_POWER_LO;
+ r->add += 2; // Compensate for relocation size of 2
+ return;
+
+ case 256 + R_PPC64_REL16_HI:
+ r->type = R_PCREL;
+ r->variant = RV_POWER_HI | RV_CHECK_OVERFLOW;
+ r->add += 2;
+ return;
+
+ case 256 + R_PPC64_REL16_HA:
+ r->type = R_PCREL;
+ r->variant = RV_POWER_HA | RV_CHECK_OVERFLOW;
+ r->add += 2;
+ return;
+ }
+
+ // Handle references to ELF symbols from our own object files.
+ if(targ->type != SDYNIMPORT)
+ return;
+
+ // TODO(austin): Translate our relocations to ELF
+
diag("unsupported relocation for dynamic symbol %s (type=%d stype=%d)", targ->name, r->type, targ->type);
}
void
elfsetupplt(void)
{
- // TODO(minux)
- return;
+ LSym *plt;
+
+ plt = linklookup(ctxt, ".plt", 0);
+ if(plt->size == 0) {
+ // The dynamic linker stores the address of the
+ // dynamic resolver and the DSO identifier in the two
+ // doublewords at the beginning of the .plt section
+ // before the PLT array. Reserve space for these.
+ plt->size = 16;
+ }
}
int
return -1;
}
+// Return the value of .TOC. for symbol s
+static vlong
+symtoc(LSym *s)
+{
+ LSym *toc;
+
+ if(s->outer != nil)
+ toc = linkrlookup(ctxt, ".TOC.", s->outer->version);
+ else
+ toc = linkrlookup(ctxt, ".TOC.", s->version);
+
+ if(toc == nil) {
+ diag("TOC-relative relocation in object without .TOC.");
+ return 0;
+ }
+ return toc->value;
+}
int
archreloc(Reloc *r, LSym *s, vlong *val)
{
uint32 o1, o2;
- int32 t;
+ vlong t;
if(linkmode == LinkExternal) {
// TODO(minux): translate R_ADDRPOWER and R_CALLPOWER into standard ELF relocations.
t = symaddr(r->sym) + r->add - (s->value + r->off);
if(t & 3)
ctxt->diag("relocation for %s+%d is not aligned: %lld", r->sym->name, r->off, t);
- if(t << 6 >> 6 != t)
+ if((int32)(t << 6) >> 6 != t)
// TODO(austin) This can happen if text > 32M.
// Add a call trampoline to .text in that case.
ctxt->diag("relocation for %s+%d is too big: %lld", r->sym->name, r->off, t);
*val = (o1 & 0xfc000003U) | (t & ~0xfc000003U);
return 0;
+ case R_POWER_TOC: // S + A - .TOC.
+ *val = symaddr(r->sym) + r->add - symtoc(s);
+ return 0;
}
return -1;
}
vlong
archrelocvariant(Reloc *r, LSym *s, vlong t)
{
- USED(r);
- USED(s);
- sysfatal("unexpected relocation variant");
+ uint32 o1;
+ switch(r->variant & RV_TYPE_MASK) {
+ default:
+ diag("unexpected relocation variant %d", r->variant);
+
+ case RV_NONE:
+ return t;
+
+ case RV_POWER_LO:
+ if(r->variant & RV_CHECK_OVERFLOW) {
+ // Whether to check for signed or unsigned
+ // overflow depends on the instruction
+ if(ctxt->arch->endian == BigEndian)
+ o1 = be32(s->p + r->off - 2);
+ else
+ o1 = le32(s->p + r->off);
+ switch(o1 >> 26) {
+ case 24: // ori
+ case 26: // xori
+ case 28: // andi
+ if((t >> 16) != 0)
+ goto overflow;
+ break;
+ default:
+ if((int16)t != t)
+ goto overflow;
+ break;
+ }
+ }
+ return (int16)t;
+
+ case RV_POWER_HA:
+ t += 0x8000;
+ // Fallthrough
+ case RV_POWER_HI:
+ t >>= 16;
+ if(r->variant & RV_CHECK_OVERFLOW) {
+ // Whether to check for signed or unsigned
+ // overflow depends on the instruction
+ if(ctxt->arch->endian == BigEndian)
+ o1 = be32(s->p + r->off - 2);
+ else
+ o1 = le32(s->p + r->off);
+ switch(o1 >> 26) {
+ case 25: // oris
+ case 27: // xoris
+ case 29: // andis
+ if((t >> 16) != 0)
+ goto overflow;
+ break;
+ default:
+ if((int16)t != t)
+ goto overflow;
+ break;
+ }
+ }
+ return (int16)t;
+
+ case RV_POWER_DS:
+ if(ctxt->arch->endian == BigEndian)
+ o1 = be16(s->p + r->off);
+ else
+ o1 = le16(s->p + r->off);
+ if(t & 3)
+ diag("relocation for %s+%d is not aligned: %lld", r->sym->name, r->off, t);
+ if((r->variant & RV_CHECK_OVERFLOW) && (int16)t != t)
+ goto overflow;
+ return (o1 & 0x3) | (vlong)(int16)t;
+ }
+
+overflow:
+ diag("relocation for %s+%d is too big: %lld", r->sym->name, r->off, t);
return t;
}
+static void
+addpltsym(Link *ctxt, LSym *s)
+{
+ if(s->plt >= 0)
+ return;
+
+ adddynsym(ctxt, s);
+
+ if(iself) {
+ LSym *plt, *rela, *glink;
+ Reloc *r;
+
+ plt = linklookup(ctxt, ".plt", 0);
+ rela = linklookup(ctxt, ".rela.plt", 0);
+ if(plt->size == 0)
+ elfsetupplt();
+
+ // Create the glink resolver if necessary
+ glink = ensureglinkresolver();
+
+ // Write symbol resolver stub (just a branch to the
+ // glink resolver stub)
+ r = addrel(glink);
+ r->sym = glink;
+ r->off = glink->size;
+ r->siz = 4;
+ r->type = R_CALLPOWER;
+ adduint32(ctxt, glink, 0x48000000); // b .glink
+
+ // In the ppc64 ABI, the dynamic linker is responsible
+ // for writing the entire PLT. We just need to
+ // reserve 8 bytes for each PLT entry and generate a
+ // JMP_SLOT dynamic relocation for it.
+ //
+ // TODO(austin): ABI v1 is different
+ s->plt = plt->size;
+ plt->size += 8;
+
+ addaddrplus(ctxt, rela, plt, s->plt);
+ adduint64(ctxt, rela, ELF64_R_INFO(s->dynid, R_PPC64_JMP_SLOT));
+ adduint64(ctxt, rela, 0);
+ } else {
+ diag("addpltsym: unsupported binary format");
+ }
+}
+
+// Generate the glink resolver stub if necessary and return the .glink section
+static LSym*
+ensureglinkresolver(void)
+{
+ LSym *glink, *s;
+ Reloc *r;
+
+ glink = linklookup(ctxt, ".glink", 0);
+ if(glink->size != 0)
+ return glink;
+
+ // This is essentially the resolver from the ppc64 ELF ABI.
+ // At entry, r12 holds the address of the symbol resolver stub
+ // for the target routine and the argument registers hold the
+ // arguments for the target routine.
+ //
+ // This stub is PIC, so first get the PC of label 1 into r11.
+ // Other things will be relative to this.
+ adduint32(ctxt, glink, 0x7c0802a6); // mflr r0
+ adduint32(ctxt, glink, 0x429f0005); // bcl 20,31,1f
+ adduint32(ctxt, glink, 0x7d6802a6); // 1: mflr r11
+ adduint32(ctxt, glink, 0x7c0803a6); // mtlf r0
+
+ // Compute the .plt array index from the entry point address.
+ // Because this is PIC, everything is relative to label 1b (in
+ // r11):
+ // r0 = ((r12 - r11) - (res_0 - r11)) / 4 = (r12 - res_0) / 4
+ adduint32(ctxt, glink, 0x3800ffd0); // li r0,-(res_0-1b)=-48
+ adduint32(ctxt, glink, 0x7c006214); // add r0,r0,r12
+ adduint32(ctxt, glink, 0x7c0b0050); // sub r0,r0,r11
+ adduint32(ctxt, glink, 0x7800f082); // srdi r0,r0,2
+
+ // r11 = address of the first byte of the PLT
+ r = addrel(glink);
+ r->off = glink->size;
+ r->sym = linklookup(ctxt, ".plt", 0);
+ r->siz = 8;
+ r->type = R_ADDRPOWER;
+ // addis r11,0,.plt@ha; addi r11,r11,.plt@l
+ r->add = (0x3d600000ull << 32) | 0x396b0000;
+ glink->size += 8;
+
+ // Load r12 = dynamic resolver address and r11 = DSO
+ // identifier from the first two doublewords of the PLT.
+ adduint32(ctxt, glink, 0xe98b0000); // ld r12,0(r11)
+ adduint32(ctxt, glink, 0xe96b0008); // ld r11,8(r11)
+
+ // Jump to the dynamic resolver
+ adduint32(ctxt, glink, 0x7d8903a6); // mtctr r12
+ adduint32(ctxt, glink, 0x4e800420); // bctr
+
+ // The symbol resolvers must immediately follow.
+ // res_0:
+
+ // Add DT_PPC64_GLINK .dynamic entry, which points to 32 bytes
+ // before the first symbol resolver stub.
+ s = linklookup(ctxt, ".dynamic", 0);
+ elfwritedynentsymplus(s, DT_PPC64_GLINK, glink, glink->size - 32);
+
+ return glink;
+}
+
void
adddynsym(Link *ctxt, LSym *s)
{
- USED(ctxt); USED(s);
- // TODO(minux)
- return;
+ LSym *d;
+ int t;
+ char *name;
+
+ if(s->dynid >= 0)
+ return;
+
+ if(iself) {
+ s->dynid = nelfsym++;
+
+ d = linklookup(ctxt, ".dynsym", 0);
+
+ name = s->extname;
+ adduint32(ctxt, d, addstring(linklookup(ctxt, ".dynstr", 0), name));
+
+ /* type */
+ t = STB_GLOBAL << 4;
+ if(s->cgoexport && (s->type&SMASK) == STEXT)
+ t |= STT_FUNC;
+ else
+ t |= STT_OBJECT;
+ adduint8(ctxt, d, t);
+
+ /* reserved */
+ adduint8(ctxt, d, 0);
+
+ /* section where symbol is defined */
+ if(s->type == SDYNIMPORT)
+ adduint16(ctxt, d, SHN_UNDEF);
+ else
+ adduint16(ctxt, d, 1);
+
+ /* value */
+ if(s->type == SDYNIMPORT)
+ adduint64(ctxt, d, 0);
+ else
+ addaddr(ctxt, d, s);
+
+ /* size of object */
+ adduint64(ctxt, d, s->size);
+ } else {
+ diag("adddynsym: unsupported binary format");
+ }
}
void
void
elfwritedynentsym(LSym *s, int tag, LSym *t)
+{
+ elfwritedynentsymplus(s, tag, t, 0);
+}
+
+void
+elfwritedynentsymplus(LSym *s, int tag, LSym *t, vlong add)
{
if(elf64)
adduint64(ctxt, s, tag);
else
adduint32(ctxt, s, tag);
- addaddr(ctxt, s, t);
+ addaddrplus(ctxt, s, t, add);
}
void
addstring(shstrtab, ".interp");
addstring(shstrtab, ".hash");
addstring(shstrtab, ".got");
+ if(thechar == '9')
+ addstring(shstrtab, ".glink");
addstring(shstrtab, ".got.plt");
addstring(shstrtab, ".dynamic");
addstring(shstrtab, ".dynsym");
/* global offset table */
s = linklookup(ctxt, ".got", 0);
s->reachable = 1;
- s->type = SELFSECT; // writable
+ s->type = SELFGOT; // writable
+
+ /* ppc64 glink resolver */
+ if(thechar == '9') {
+ s = linklookup(ctxt, ".glink", 0);
+ s->reachable = 1;
+ s->type = SELFRXSECT;
+ }
/* hash */
s = linklookup(ctxt, ".hash", 0);
s = linklookup(ctxt, ".plt", 0);
s->reachable = 1;
- s->type = SELFRXSECT;
+ if(thechar == '9')
+ // In the ppc64 ABI, .plt is a data section
+ // written by the dynamic linker.
+ s->type = SELFSECT;
+ else
+ s->type = SELFRXSECT;
elfsetupplt();
}
if(rpath)
elfwritedynent(s, DT_RUNPATH, addstring(dynstr, rpath));
-
- elfwritedynentsym(s, DT_PLTGOT, linklookup(ctxt, ".got.plt", 0));
+
+ if(thechar == '9')
+ elfwritedynentsym(s, DT_PLTGOT, linklookup(ctxt, ".plt", 0));
+ else
+ elfwritedynentsym(s, DT_PLTGOT, linklookup(ctxt, ".got.plt", 0));
+
+ if(thechar == '9')
+ elfwritedynent(s, DT_PPC64_OPT, 0);
// Solaris dynamic linker can't handle an empty .rela.plt if
// DT_JMPREL is emitted so we have to defer generation of DT_PLTREL,
switch(eh->machine) {
case EM_X86_64:
+ case EM_PPC64:
sh = elfshname(".rela.plt");
sh->type = SHT_RELA;
sh->flags = SHF_ALLOC;
break;
}
+ if(eh->machine == EM_PPC64) {
+ sh = elfshname(".glink");
+ sh->type = SHT_PROGBITS;
+ sh->flags = SHF_ALLOC+SHF_EXECINSTR;
+ sh->addralign = 4;
+ shsym(sh, linklookup(ctxt, ".glink", 0));
+ }
+
sh = elfshname(".plt");
sh->type = SHT_PROGBITS;
sh->flags = SHF_ALLOC+SHF_EXECINSTR;
if(eh->machine == EM_X86_64)
sh->entsize = 16;
- else
+ else if(eh->machine == EM_PPC64) {
+ // On ppc64, this is just a table of addresses
+ // filled by the dynamic linker
+ sh->type = SHT_NOBITS;
+ sh->flags = SHF_ALLOC+SHF_WRITE;
+ sh->entsize = 8;
+ } else
sh->entsize = 4;
- sh->addralign = 4;
+ sh->addralign = sh->entsize;
shsym(sh, linklookup(ctxt, ".plt", 0));
- sh = elfshname(".got");
- sh->type = SHT_PROGBITS;
- sh->flags = SHF_ALLOC+SHF_WRITE;
- sh->entsize = RegSize;
- sh->addralign = RegSize;
- shsym(sh, linklookup(ctxt, ".got", 0));
+ // On ppc64, .got comes from the input files, so don't
+ // create it here, and .got.plt is not used.
+ if(eh->machine != EM_PPC64) {
+ sh = elfshname(".got");
+ sh->type = SHT_PROGBITS;
+ sh->flags = SHF_ALLOC+SHF_WRITE;
+ sh->entsize = RegSize;
+ sh->addralign = RegSize;
+ shsym(sh, linklookup(ctxt, ".got", 0));
- sh = elfshname(".got.plt");
- sh->type = SHT_PROGBITS;
- sh->flags = SHF_ALLOC+SHF_WRITE;
- sh->entsize = RegSize;
- sh->addralign = RegSize;
- shsym(sh, linklookup(ctxt, ".got.plt", 0));
+ sh = elfshname(".got.plt");
+ sh->type = SHT_PROGBITS;
+ sh->flags = SHF_ALLOC+SHF_WRITE;
+ sh->entsize = RegSize;
+ sh->addralign = RegSize;
+ shsym(sh, linklookup(ctxt, ".got.plt", 0));
+ }
sh = elfshname(".hash");
sh->type = SHT_HASH;