From 5e1d0fcbed3060b00c33e48ae6fdbaa92965c287 Mon Sep 17 00:00:00 2001 From: Michael Hudson-Doyle Date: Tue, 8 Sep 2015 15:21:58 +1200 Subject: [PATCH] cmd/internal/obj, cmd/link: handle the fact that a few store/loads on ppc64 are DS form Change-Id: I4fe1af48ec1cd8a23e2f7f2a0257dc989ff7aced Reviewed-on: https://go-review.googlesource.com/14235 Reviewed-by: Russ Cox --- src/cmd/internal/obj/link.go | 12 ++++ src/cmd/internal/obj/ppc64/asm9.go | 66 ++++++++++++++++------ src/cmd/link/internal/ppc64/asm.go | 91 +++++++++++++++++------------- 3 files changed, 113 insertions(+), 56 deletions(-) diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index ef63a7047b..367dc1247f 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -404,6 +404,11 @@ type Reloc struct { // Reloc.type const ( R_ADDR = 1 + iota + // R_ADDRPOWER relocates a pair of "D-form" instructions (instructions with 16-bit + // immediates in the low half of the instruction word), usually addis followed by + // another add or a load, inserting the "high adjusted" 16 bits of the address of + // the referenced symbol into the immediate field of the first instruction and the + // low 16 bits into that of the second instruction. R_ADDRPOWER R_ADDRARM64 R_SIZE @@ -459,6 +464,13 @@ const ( // thread pointer (R13) and inserts this value into the low 16 bits of an // instruction word. R_POWER_TLS_LE + + // R_ADDRPOWER_DS is similar to R_ADDRPOWER above, but assumes the second + // instruction is a "DS-form" instruction, which has an immediate field occupying + // bits [15:2] of the instruction word. Bits [15:2] of the address of the + // relocated symbol are inserted into this field; it is an error if the last two + // bits of the address are not 0. + R_ADDRPOWER_DS ) type Auto struct { diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 7ade888be7..3e3a020a7c 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -1377,14 +1377,57 @@ func oclass(a *obj.Addr) int { return int(a.Class) - 1 } -// add R_ADDRPOWER relocation to symbol s with addend d -func addaddrreloc(ctxt *obj.Link, s *obj.LSym, d int64) { +const ( + D_FORM = iota + DS_FORM +) + +// opform returns the form (D_FORM or DS_FORM) of an instruction. Used to decide on +// which relocation to use with a load or store and only supports the needed +// instructions. +func opform(ctxt *obj.Link, insn int32) int { + switch uint32(insn) { + default: + ctxt.Diag("bad insn in loadform: %x", insn) + case OPVCC(58, 0, 0, 0), // ld + OPVCC(58, 0, 0, 0) | 1<<1, // lwa + OPVCC(62, 0, 0, 0): // std + return DS_FORM + case OP_ADDI, // add + OPVCC(32, 0, 0, 0), // lwz + OPVCC(42, 0, 0, 0), // lha + OPVCC(40, 0, 0, 0), // lhz + OPVCC(34, 0, 0, 0), // lbz + OPVCC(50, 0, 0, 0), // lfd + OPVCC(48, 0, 0, 0), // lfs + OPVCC(36, 0, 0, 0), // stw + OPVCC(44, 0, 0, 0), // sth + OPVCC(38, 0, 0, 0), // stb + OPVCC(54, 0, 0, 0), // stfd + OPVCC(52, 0, 0, 0): // stfs + return D_FORM + } + return 0 +} + +// Encode instructions and create relocation for accessing s+d according to the +// instruction op with source or destination (as appropriate) register reg. +func symbolAccess(ctxt *obj.Link, s *obj.LSym, d int64, reg int16, op int32) (o1, o2 uint32) { + form := opform(ctxt, op) + o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, 0) + o2 = AOP_IRR(uint32(op), uint32(reg), REGTMP, 0) rel := obj.Addrel(ctxt.Cursym) rel.Off = int32(ctxt.Pc) rel.Siz = 8 rel.Sym = s rel.Add = d - rel.Type = obj.R_ADDRPOWER + switch form { + case D_FORM: + rel.Type = obj.R_ADDRPOWER + case DS_FORM: + rel.Type = obj.R_ADDRPOWER_DS + } + return } /* @@ -1810,9 +1853,7 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { o1 = loadu32(int(p.To.Reg), d) o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d))) } else { - o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, 0) - o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), REGTMP, 0) - addaddrreloc(ctxt, p.From.Sym, d) + o1, o2 = symbolAccess(ctxt, p.From.Sym, d, p.To.Reg, OP_ADDI) } //if(dlm) reloc(&p->from, p->pc, 0); @@ -2377,26 +2418,19 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { /* relocation operations */ case 74: v := vregoff(ctxt, &p.To) - - o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, 0) - o2 = AOP_IRR(uint32(opstore(ctxt, int(p.As))), uint32(p.From.Reg), REGTMP, 0) - addaddrreloc(ctxt, p.To.Sym, v) + o1, o2 = symbolAccess(ctxt, p.To.Sym, v, p.From.Reg, opstore(ctxt, int(p.As))) //if(dlm) reloc(&p->to, p->pc, 1); case 75: v := vregoff(ctxt, &p.From) - o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, 0) - o2 = AOP_IRR(uint32(opload(ctxt, int(p.As))), uint32(p.To.Reg), REGTMP, 0) - addaddrreloc(ctxt, p.From.Sym, v) + o1, o2 = symbolAccess(ctxt, p.From.Sym, v, p.To.Reg, opload(ctxt, int(p.As))) //if(dlm) reloc(&p->from, p->pc, 1); case 76: v := vregoff(ctxt, &p.From) - o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, 0) - o2 = AOP_IRR(uint32(opload(ctxt, int(p.As))), uint32(p.To.Reg), REGTMP, 0) - addaddrreloc(ctxt, p.From.Sym, v) + o1, o2 = symbolAccess(ctxt, p.From.Sym, v, p.To.Reg, opload(ctxt, int(p.As))) o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0) //if(dlm) reloc(&p->from, p->pc, 1); diff --git a/src/cmd/link/internal/ppc64/asm.go b/src/cmd/link/internal/ppc64/asm.go index b8ab534461..e6dbec23eb 100644 --- a/src/cmd/link/internal/ppc64/asm.go +++ b/src/cmd/link/internal/ppc64/asm.go @@ -330,6 +330,55 @@ func symtoc(s *ld.LSym) int64 { return toc.Value } +func archrelocaddr(r *ld.Reloc, s *ld.LSym, val *int64) int { + var o1, o2 uint32 + if ld.Ctxt.Arch.ByteOrder == binary.BigEndian { + o1 = uint32(*val >> 32) + o2 = uint32(*val) + } else { + o1 = uint32(*val) + o2 = uint32(*val >> 32) + } + + // We are spreading a 31-bit address across two instructions, putting the + // high (adjusted) part in the low 16 bits of the first instruction and the + // low part in the low 16 bits of the second instruction, or, in the DS case, + // bits 15-2 (inclusive) of the address into bits 15-2 of the second + // instruction (it is an error in this case if the low 2 bits of the address + // are non-zero). + + t := ld.Symaddr(r.Sym) + r.Add + if t < 0 || t >= 1<<31 { + ld.Ctxt.Diag("relocation for %s is too big (>=2G): %d", s.Name, ld.Symaddr(r.Sym)) + } + if t&0x8000 != 0 { + t += 0x10000 + } + + switch r.Type { + case obj.R_ADDRPOWER: + o1 |= (uint32(t) >> 16) & 0xffff + o2 |= uint32(t) & 0xffff + + case obj.R_ADDRPOWER_DS: + o1 |= (uint32(t) >> 16) & 0xffff + if t&3 != 0 { + ld.Ctxt.Diag("bad DS reloc for %s: %d", s.Name, ld.Symaddr(r.Sym)) + } + o2 |= uint32(t) & 0xfffc + + default: + return -1 + } + + if ld.Ctxt.Arch.ByteOrder == binary.BigEndian { + *val = int64(o1)<<32 | int64(o2) + } else { + *val = int64(o2)<<32 | int64(o1) + } + return 0 +} + func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int { if ld.Linkmode == ld.LinkExternal { // TODO(minux): translate R_ADDRPOWER and R_CALLPOWER into standard ELF relocations. @@ -347,46 +396,8 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int { *val = ld.Symaddr(r.Sym) + r.Add - ld.Symaddr(ld.Linklookup(ld.Ctxt, ".got", 0)) return 0 - case obj.R_ADDRPOWER: - // We are spreading a 31-bit address across two instructions, - // putting the high (adjusted) part in the low 16 bits of the - // first instruction and the low part in the low 16 bits of the - // second instruction. - t := ld.Symaddr(r.Sym) + r.Add - if t < 0 || t >= 1<<31 { - ld.Ctxt.Diag("relocation for %s is too big (>=2G): %d", s.Name, ld.Symaddr(r.Sym)) - } - var o1, o2 uint32 - if ld.Ctxt.Arch.ByteOrder == binary.BigEndian { - o1 = uint32(*val >> 32) - o2 = uint32(*val) - } else { - o1 = uint32(*val) - o2 = uint32(*val >> 32) - } - if t&0x8000 != 0 { - t += 0x10000 - } - // There is an almost-bug here. When R_ADDRPOWER is relocating a - // load, the two instructions are addi and then a load. addi and - // almost all loads are "D-form" instructions, which have a - // 16-bit immediate in the lower 16-bits of the instruction - // word. But the load doubleword instruction is a "DS-form" - // instruction: the immediate only occupies bits 16-29 of the - // instruction and is implicity padded with zeros on the - // right. The reason the belows isn't a bug is because we only - // ever use immediates that have zeros on in their lower bits - // with ld, and we combine the immediate with | so bits 30 and - // 31 are preserved. - o1 |= (uint32(t) >> 16) & 0xffff - o2 |= uint32(t) & 0xffff - - if ld.Ctxt.Arch.ByteOrder == binary.BigEndian { - *val = int64(o1)<<32 | int64(o2) - } else { - *val = int64(o2)<<32 | int64(o1) - } - return 0 + case obj.R_ADDRPOWER, obj.R_ADDRPOWER_DS: + return archrelocaddr(r, s, val) case obj.R_CALLPOWER: // Bits 6 through 29 = (S + A - P) >> 2 -- 2.48.1