From: Michael Hudson-Doyle Date: Fri, 16 Oct 2015 08:15:18 +0000 (+1300) Subject: cmd/internal/obj, runtime: implement IE model TLS on ppc64le X-Git-Tag: go1.6beta1~486 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=a35c85c0cc518b89e0318bf8b6c2c584c61090bb;p=gostls13.git cmd/internal/obj, runtime: implement IE model TLS on ppc64le This requires changing the tls access code to match the patterns documented in the ABI documentation or the system linker will "optimize" it into ridiculousness. With this change, -buildmode=pie works, although as it is tested in testshared, the tests are not run yet. Change-Id: I1efa6687af0a5b8db3385b10f6542a49056b2eb3 Reviewed-on: https://go-review.googlesource.com/15971 Reviewed-by: Russ Cox --- diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index ab1de2447b..a7e0c2f863 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -480,6 +480,19 @@ const ( // instruction word. R_POWER_TLS_LE + // R_POWER_TLS_IE is used to implement the "initial exec" model for tls access. It + // relocates a D-form, DS-form instruction sequence like R_ADDRPOWER_DS. It + // inserts to the offset of GOT slot for the thread-local symbol from the TOC (the + // GOT slot is filled by the dynamic linker with the offset of the thread-local + // symbol from the thread pointer (R13)). + R_POWER_TLS_IE + + // R_POWER_TLS marks an X-form instruction such as "MOVD 0(R13)(R31*1), g" as + // accessing a particular thread-local symbol. It does not affect code generation + // but is used by the system linker when relaxing "initial exec" model code to + // "local exec" model code. + R_POWER_TLS + // R_ADDRPOWER_DS is similar to R_ADDRPOWER above, but assumes the second // instruction is a "DS-form" instruction, which has an immediate field occupying // bits [15:2] of the instruction word. Bits [15:2] of the address of the diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index 3fd4685769..d28e1e895e 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -222,6 +222,7 @@ const ( C_GOK C_ADDR C_TLS_LE + C_TLS_IE C_TEXTSIZE C_NCLASS /* must be the last */ diff --git a/src/cmd/internal/obj/ppc64/anames9.go b/src/cmd/internal/obj/ppc64/anames9.go index 62125a4e52..1b5d564dfe 100644 --- a/src/cmd/internal/obj/ppc64/anames9.go +++ b/src/cmd/internal/obj/ppc64/anames9.go @@ -36,6 +36,7 @@ var cnames9 = []string{ "GOK", "ADDR", "TLS_LE", + "TLS_IE", "TEXTSIZE", "NCLASS", } diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index c226ed540d..25b9aef830 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -246,6 +246,7 @@ var optab = []Optab{ {AMOVB, C_ADDR, C_NONE, C_NONE, C_REG, 76, 12, 0}, {AMOVD, C_TLS_LE, C_NONE, C_NONE, C_REG, 79, 4, 0}, + {AMOVD, C_TLS_IE, C_NONE, C_NONE, C_REG, 80, 8, 0}, /* load constant */ {AMOVD, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB}, @@ -587,7 +588,11 @@ func aclass(ctxt *obj.Link, a *obj.Addr) int { ctxt.Instoffset = a.Offset if a.Sym != nil { // use relocation if a.Sym.Type == obj.STLSBSS { - return C_TLS_LE + if ctxt.Flag_shared != 0 { + return C_TLS_IE + } else { + return C_TLS_LE + } } return C_ADDR } @@ -1652,6 +1657,18 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { if v != 0 { ctxt.Diag("illegal indexed instruction\n%v", p) } + if ctxt.Flag_shared != 0 && r == REG_R13 { + rel := obj.Addrel(ctxt.Cursym) + rel.Off = int32(ctxt.Pc) + rel.Siz = 4 + // This (and the matching part in the load case + // below) are the only places in the ppc64 toolchain + // that knows the name of the tls variable. Possibly + // we could add some assembly syntax so that the name + // of the variable does not have to be assumed. + rel.Sym = obj.Linklookup(ctxt, "runtime.tls_g", 0) + rel.Type = obj.R_POWER_TLS + } o1 = AOP_RRR(uint32(opstorex(ctxt, int(p.As))), uint32(p.From.Reg), uint32(p.To.Index), uint32(r)) } else { if int32(int16(v)) != v { @@ -1671,6 +1688,13 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { if v != 0 { ctxt.Diag("illegal indexed instruction\n%v", p) } + if ctxt.Flag_shared != 0 && r == REG_R13 { + rel := obj.Addrel(ctxt.Cursym) + rel.Off = int32(ctxt.Pc) + rel.Siz = 4 + rel.Sym = obj.Linklookup(ctxt, "runtime.tls_g", 0) + rel.Type = obj.R_POWER_TLS + } o1 = AOP_RRR(uint32(oploadx(ctxt, int(p.As))), uint32(p.To.Reg), uint32(p.From.Index), uint32(r)) } else { if int32(int16(v)) != v { @@ -2467,6 +2491,18 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { rel.Sym = p.From.Sym rel.Type = obj.R_POWER_TLS_LE + case 80: + if p.From.Offset != 0 { + ctxt.Diag("invalid offset against tls var %v", p) + } + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0) + o2 = AOP_IRR(uint32(opload(ctxt, AMOVD)), uint32(p.To.Reg), uint32(p.To.Reg), 0) + rel := obj.Addrel(ctxt.Cursym) + rel.Off = int32(ctxt.Pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Type = obj.R_POWER_TLS_IE + } out[0] = o1 diff --git a/src/cmd/link/internal/ld/elf.go b/src/cmd/link/internal/ld/elf.go index 51552e3c15..505cbcdbb1 100644 --- a/src/cmd/link/internal/ld/elf.go +++ b/src/cmd/link/internal/ld/elf.go @@ -564,23 +564,28 @@ const ( R_PPC_EMB_BIT_FLD = 115 R_PPC_EMB_RELSDA = 116 - R_PPC64_ADDR32 = R_PPC_ADDR32 - R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO - R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA - R_PPC64_REL24 = R_PPC_REL24 - R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT - R_PPC64_TPREL16 = R_PPC_TPREL16 - R_PPC64_ADDR64 = 38 - R_PPC64_TOC16 = 47 - R_PPC64_TOC16_LO = 48 - R_PPC64_TOC16_HI = 49 - R_PPC64_TOC16_HA = 50 - R_PPC64_ADDR16_LO_DS = 57 - R_PPC64_TOC16_DS = 63 - R_PPC64_TOC16_LO_DS = 64 - R_PPC64_REL16_LO = 250 - R_PPC64_REL16_HI = 251 - R_PPC64_REL16_HA = 252 + R_PPC64_ADDR32 = R_PPC_ADDR32 + R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO + R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA + R_PPC64_REL24 = R_PPC_REL24 + R_PPC64_GOT16_HA = R_PPC_GOT16_HA + R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT + R_PPC64_TPREL16 = R_PPC_TPREL16 + R_PPC64_ADDR64 = 38 + R_PPC64_TOC16 = 47 + R_PPC64_TOC16_LO = 48 + R_PPC64_TOC16_HI = 49 + R_PPC64_TOC16_HA = 50 + R_PPC64_ADDR16_LO_DS = 57 + R_PPC64_GOT16_LO_DS = 59 + R_PPC64_TOC16_DS = 63 + R_PPC64_TOC16_LO_DS = 64 + R_PPC64_TLS = 67 + R_PPC64_GOT_TPREL16_LO_DS = 88 + R_PPC64_GOT_TPREL16_HA = 90 + R_PPC64_REL16_LO = 250 + R_PPC64_REL16_HI = 251 + R_PPC64_REL16_HA = 252 R_SPARC_NONE = 0 R_SPARC_8 = 1 diff --git a/src/cmd/link/internal/ppc64/asm.go b/src/cmd/link/internal/ppc64/asm.go index 97efe7d354..706160d0fb 100644 --- a/src/cmd/link/internal/ppc64/asm.go +++ b/src/cmd/link/internal/ppc64/asm.go @@ -310,9 +310,18 @@ func elfreloc1(r *ld.Reloc, sectoff int64) int { return -1 } + case obj.R_POWER_TLS: + ld.Thearch.Vput(ld.R_PPC64_TLS | uint64(elfsym)<<32) + case obj.R_POWER_TLS_LE: ld.Thearch.Vput(ld.R_PPC64_TPREL16 | uint64(elfsym)<<32) + case obj.R_POWER_TLS_IE: + ld.Thearch.Vput(ld.R_PPC64_GOT_TPREL16_HA | uint64(elfsym)<<32) + ld.Thearch.Vput(uint64(r.Xadd)) + ld.Thearch.Vput(uint64(sectoff + 4)) + ld.Thearch.Vput(ld.R_PPC64_GOT_TPREL16_LO_DS | uint64(elfsym)<<32) + case obj.R_ADDRPOWER: ld.Thearch.Vput(ld.R_PPC64_ADDR16_HA | uint64(elfsym)<<32) ld.Thearch.Vput(uint64(r.Xadd)) @@ -444,7 +453,7 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int { default: return -1 - case obj.R_POWER_TLS_LE: + case obj.R_POWER_TLS, obj.R_POWER_TLS_LE, obj.R_POWER_TLS_IE: r.Done = 0 // check Outer is nil, Type is TLSBSS? r.Xadd = r.Add diff --git a/src/runtime/tls_ppc64x.s b/src/runtime/tls_ppc64x.s index 1b030fd36a..c79c97dae3 100644 --- a/src/runtime/tls_ppc64x.s +++ b/src/runtime/tls_ppc64x.s @@ -27,8 +27,7 @@ TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0 CMP R31, $0 BEQ nocgo MOVD runtime·tls_g(SB), R31 - ADD R13, R31 - MOVD g, 0(R31) + MOVD g, 0(R13)(R31*1) nocgo: RET @@ -44,8 +43,7 @@ nocgo: // NOTE: _cgo_topofstack assumes this only clobbers g (R30), and R31. TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0 MOVD runtime·tls_g(SB), R31 - ADD R13, R31 - MOVD 0(R31), g + MOVD 0(R13)(R31*1), g RET GLOBL runtime·tls_g+0(SB), TLSBSS, $8