]> Cypherpunks repositories - gostls13.git/commitdiff
liblink: introduce TLS register on 386 and amd64
authorRuss Cox <rsc@golang.org>
Tue, 15 Apr 2014 17:45:39 +0000 (13:45 -0400)
committerRuss Cox <rsc@golang.org>
Tue, 15 Apr 2014 17:45:39 +0000 (13:45 -0400)
When I did the original 386 ports on Linux and OS X, I chose to
define GS-relative expressions like 4(GS) as relative to the actual
thread-local storage base, which was usually GS but might not be
(it might be FS, or it might be a different constant offset from GS or FS).

The original scope was limited but since then the rewrites have
gotten out of control. Sometimes GS is rewritten, sometimes FS.
Some ports do other rewrites to enable shared libraries and
other linking. At no point in the code is it clear whether you are
looking at the real GS/FS or some synthesized thing that will be
rewritten. The code manipulating all these is duplicated in many
places.

The first step to fixing issue 7719 is to make the code intelligible
again.

This CL adds an explicit TLS pseudo-register to the 386 and amd64.
As a register, TLS refers to the thread-local storage base, and it
can only be loaded into another register:

        MOVQ TLS, AX

An offset from the thread-local storage base is written off(reg)(TLS*1).
Semantically it is off(reg), but the (TLS*1) annotation marks this as
indexing from the loaded TLS base. This emits a relocation so that
if the linker needs to adjust the offset, it can. For example:

        MOVQ TLS, AX
        MOVQ 8(AX)(TLS*1), CX // load m into CX

On systems that support direct access to the TLS memory, this
pair of instructions can be reduced to a direct TLS memory reference:

        MOVQ 8(TLS), CX // load m into CX

The 2-instruction and 1-instruction forms correspond roughly to
ELF TLS initial exec mode and ELF TLS local exec mode, respectively.

Liblink applies this rewrite on systems that support the 1-instruction form.
The decision is made using only the operating system (and probably
the -shared flag, eventually), not the link mode. If some link modes
on a particular operating system require the 2-instruction form,
then all builds for that operating system will use the 2-instruction
form, so that the link mode decision can be delayed to link time.

Obviously it is late to be making changes like this, but I despair
of correcting issue 7719 and issue 7164 without it. To make sure
I am not changing existing behavior, I built a "hello world" program
for every GOOS/GOARCH combination we have and then worked
to make sure that the rewrite generates exactly the same binaries,
byte for byte. There are a handful of TODOs in the code marking
kludges to get the byte-for-byte property, but at least now I can
explain exactly how each binary is handled.

The targets I tested this way are:

        darwin-386
        darwin-amd64
        dragonfly-386
        dragonfly-amd64
        freebsd-386
        freebsd-amd64
        freebsd-arm
        linux-386
        linux-amd64
        linux-arm
        nacl-386
        nacl-amd64p32
        netbsd-386
        netbsd-amd64
        openbsd-386
        openbsd-amd64
        plan9-386
        plan9-amd64
        solaris-amd64
        windows-386
        windows-amd64

There were four exceptions to the byte-for-byte goal:

windows-386 and windows-amd64 have a time stamp
at bytes 137 and 138 of the header.

darwin-386 and plan9-386 have five or six modified
bytes in the middle of the Go symbol table, caused by
editing comments in runtime/sys_{darwin,plan9}_386.s.

Fixes #7164.

LGTM=iant
R=iant, aram, minux.ma, dave
CC=golang-codereviews
https://golang.org/cl/87920043

26 files changed:
include/link.h
src/cmd/6a/lex.c
src/cmd/6c/txt.c
src/cmd/6l/6.out.h
src/cmd/6l/obj.c
src/cmd/8a/lex.c
src/cmd/8c/txt.c
src/cmd/8l/8.out.h
src/cmd/8l/obj.c
src/cmd/dist/buildruntime.c
src/cmd/ld/data.c
src/cmd/ld/pobj.c
src/liblink/asm6.c
src/liblink/asm8.c
src/liblink/list6.c
src/liblink/list8.c
src/liblink/obj6.c
src/liblink/obj8.c
src/liblink/objfile.c
src/liblink/sym.c
src/pkg/runtime/runtime.h
src/pkg/runtime/sys_darwin_386.s
src/pkg/runtime/sys_linux_386.s
src/pkg/runtime/sys_nacl_amd64p32.s
src/pkg/runtime/sys_plan9_386.s
src/pkg/runtime/sys_plan9_amd64.s

index 92b8b73b6b4cfb805878e397d0e66aa5ae8b70e0..9a6fca2ab004f931586ae640395e3437bb81e82c 100644 (file)
@@ -232,6 +232,8 @@ enum
        R_CONST,
        R_PCREL,
        R_TLS,
+       R_TLS_LE, // TLS local exec offset from TLS segment register
+       R_TLS_IE, // TLS initial exec offset from TLS base pointer
        R_GOTOFF,
        R_PLT0,
        R_PLT1,
@@ -340,7 +342,6 @@ struct      Link
        char*   thestring; // full name of architecture ("arm", "amd64", ..)
        int32   goarm; // for arm only, GOARM setting
        int     headtype;
-       int     linkmode;
 
        LinkArch*       arch;
        int32   (*ignore)(char*);       // do not emit names satisfying this function
index e24fbc9dc4b9175ab00eea7bc71e2df7de221654..2a1c4b8e1fb6b61b0d89f25859a23ac3b02df070 100644 (file)
@@ -334,6 +334,7 @@ struct
        "TR5",          LBREG,  D_TR+5,
        "TR6",          LBREG,  D_TR+6,
        "TR7",          LBREG,  D_TR+7,
+       "TLS",          LSREG,  D_TLS,
 
        "AAA",          LTYPE0, AAAA,
        "AAD",          LTYPE0, AAAD,
index f308aff08c1aa932b6bb350791009e638b157687..4d07436c3f59e8a3b525afaf057a119faccf9b7c 100644 (file)
@@ -513,7 +513,7 @@ naddr(Node *n, Addr *a)
                break;
 
        case OEXREG:
-               a->type = D_INDIR + D_GS;
+               a->type = D_INDIR + D_TLS;
                a->offset = n->reg - 1;
                break;
 
index a8e11a10d27c9366503fc24a3bd5136477392edb..1e2a1488f2c43cb609187a1ba40c5e03123f8f04 100644 (file)
@@ -850,20 +850,18 @@ enum
        D_DR            = 95,
        D_TR            = 103,
 
-       D_NONE          = 111,
-
-       D_BRANCH        = 112,
-       D_EXTERN        = 113,
-       D_STATIC        = 114,
-       D_AUTO          = 115,
-       D_PARAM         = 116,
-       D_CONST         = 117,
-       D_FCONST        = 118,
-       D_SCONST        = 119,
-       D_ADDR          = 120,
-
-       D_FILE,
-       D_FILE1,
+       D_TLS           = 111,
+       D_NONE          = 112,
+
+       D_BRANCH        = 113,
+       D_EXTERN        = 114,
+       D_STATIC        = 115,
+       D_AUTO          = 116,
+       D_PARAM         = 117,
+       D_CONST         = 118,
+       D_FCONST        = 119,
+       D_SCONST        = 120,
+       D_ADDR          = 121,
 
        D_INDIR,        /* additive */
 
index 0d872eaebd8c065011f6c81466c842d26d625790..3b8e8f4d7a1125817da485f1adc04ef1ad99af82 100644 (file)
@@ -79,7 +79,6 @@ archinit(void)
        case Hsolaris:
                break;
        }
-       ctxt->linkmode = linkmode;
 
        switch(HEADTYPE) {
        default:
index 6c55b314357cd04b98208970771a7d93e98e57b9..49a105da6acb325268bca27ca71af8317c7f62f7 100644 (file)
@@ -241,6 +241,7 @@ struct
        "ES",           LSREG,  D_ES,
        "FS",           LSREG,  D_FS,
        "GS",           LSREG,  D_GS,
+       "TLS",          LSREG,  D_TLS,
 
        "GDTR",         LBREG,  D_GDTR,
        "IDTR",         LBREG,  D_IDTR,
index 1b6c2e6d960cb8622e3c1a179335ecb358f4440d..25082de05da29e2f2d263fe7e6ae37e9df78d0d7 100644 (file)
@@ -464,7 +464,7 @@ naddr(Node *n, Addr *a)
                break;
 
        case OEXREG:
-               a->type = D_INDIR + D_GS;
+               a->type = D_INDIR + D_TLS;
                a->offset = n->reg - 1;
                break;
 
index 0dcd74a61d35a99c64686975ac2e775062c4b951..8e642d390340ce7a1fdcc5466f1f17009939d2c4 100644 (file)
@@ -636,21 +636,19 @@ enum
        D_X5,
        D_X6,
        D_X7,
-
-       D_NONE          = 67,
-
-       D_BRANCH        = 68,
-       D_EXTERN        = 69,
-       D_STATIC        = 70,
-       D_AUTO          = 71,
-       D_PARAM         = 72,
-       D_CONST         = 73,
-       D_FCONST        = 74,
-       D_SCONST        = 75,
-       D_ADDR          = 76,
-
-       D_FILE,
-       D_FILE1,
+       
+       D_TLS           = 67,
+       D_NONE          = 68,
+
+       D_BRANCH        = 69,
+       D_EXTERN        = 70,
+       D_STATIC        = 71,
+       D_AUTO          = 72,
+       D_PARAM         = 73,
+       D_CONST         = 74,
+       D_FCONST        = 75,
+       D_SCONST        = 76,
+       D_ADDR          = 77,
 
        D_INDIR,        /* additive */
 
index ddbd96aa03693f44dfa559c2cb0afb171946500f..1b65c5eb9e21f45673c6523973d71eadc37ae62e 100644 (file)
@@ -69,7 +69,6 @@ archinit(void)
        case Hopenbsd:
                break;
        }
-       ctxt->linkmode = linkmode;
 
        switch(HEADTYPE) {
        default:
index e2d46cdac4a8c8b2603db4040a455bb503461749..ba5993b2fcd8413257b9114f86951528f03b0fce 100644 (file)
@@ -127,99 +127,22 @@ static struct {
        char *goos;
        char *hdr;
 } zasmhdr[] = {
-       {"386", "windows",
-               "#define        get_tls(r)      MOVL 0x14(FS), r\n"
-               "#define        g(r)    0(r)\n"
-               "#define        m(r)    4(r)\n"
-       },
-       {"386", "plan9",
-               "// Plan 9 does not have per-process segment descriptors with\n"
-               "// which to do thread-local storage. Instead, we will use a\n"
-               "// fixed offset from the per-process TOS struct address for\n"
-               "// the local storage. Since the process ID is contained in the\n"
-               "// TOS struct, we specify an offset for that here as well.\n"
-               "#define        get_tls(r)      MOVL _tos(SB), r \n"
-               "#define        g(r)    -8(r)\n"
-               "#define        m(r)    -4(r)\n"
-               "#define        procid(r)       48(r)\n"
-       },
-       {"386", "linux",
-               "// On Linux systems, what we call 0(GS) and 4(GS) for g and m\n"
-               "// turn into %gs:-8 and %gs:-4 (using gcc syntax to denote\n"
-               "// what the machine sees as opposed to 8l input).\n"
-               "// 8l rewrites 0(GS) and 4(GS) into these.\n"
-               "//\n"
-               "// On Linux Xen, it is not allowed to use %gs:-8 and %gs:-4\n"
-               "// directly.  Instead, we have to store %gs:0 into a temporary\n"
-               "// register and then use -8(%reg) and -4(%reg).  This kind\n"
-               "// of addressing is correct even when not running Xen.\n"
-               "//\n"
-               "// 8l can rewrite MOVL 0(GS), CX into the appropriate pair\n"
-               "// of mov instructions, using CX as the intermediate register\n"
-               "// (safe because CX is about to be written to anyway).\n"
-               "// But 8l cannot handle other instructions, like storing into 0(GS),\n"
-               "// which is where these macros come into play.\n"
-               "// get_tls sets up the temporary and then g and r use it.\n"
-               "//\n"
-               "// Another wrinkle is that get_tls needs to read from %gs:0,\n"
-               "// but in 8l input it's called 8(GS), because 8l is going to\n"
-               "// subtract 8 from all the offsets, as described above.\n"
-               "//\n"
-               "// The final wrinkle is that when generating an ELF .o file for\n"
-               "// external linking mode, we need to be able to relocate the\n"
-               "// -8(r) and -4(r) instructions. Tag them with an extra (GS*1)\n"
-               "// that is ignored by the linker except for that identification.\n"
-               "#define        get_tls(r)      MOVL 8(GS), r\n"
-               "#define        g(r)    -8(r)(GS*1)\n"
-               "#define        m(r)    -4(r)(GS*1)\n"
-       },
-       {"386", "nacl",
-               // Same as Linux above.
-               "#define        get_tls(r)      MOVL 8(GS), r\n"
-               "#define        g(r)    -8(r)(GS*1)\n"
-               "#define        m(r)    -4(r)(GS*1)\n"
-       },
        {"386", "",
-               "#define        get_tls(r)\n"
-               "#define        g(r)    0(GS)\n"
-               "#define        m(r)    4(GS)\n"
-       },
-
-       {"amd64p32", "nacl",
-               "#define get_tls(r)\n"
-               "#define g(r) 0(GS)\n"
-               "#define m(r) 4(GS)\n"
-       },
-       {"amd64", "windows",
-               "#define        get_tls(r) MOVQ 0x28(GS), r\n"
-               "#define        g(r) 0(r)\n"
-               "#define        m(r) 8(r)\n"
-       },
-       {"amd64", "plan9",
-               "#define        get_tls(r)\n"
-               "#define        g(r) 0(GS)\n"
-               "#define        m(r) 8(GS)\n"
-               "#define        procid(r) 16(GS)\n"
+               "#define        get_tls(r)      MOVL TLS, r\n"
+               "#define        g(r)    0(r)(TLS*1)\n"
+               "#define        m(r)    4(r)(TLS*1)\n"
        },
-       {"amd64", "solaris",
-               "#define        get_tls(r) MOVQ 0(FS), r\n"
-               "#define        g(r) -16(r)(FS*1)\n"
-               "#define        m(r) -8(r)(FS*1)\n"
-       },
-       // The TLS accessors here are defined here to use initial exec model.
-       // If the linker is not outputting a shared library, it will reduce
-       // the TLS accessors to the local exec model, effectively removing
-       // get_tls().
-       {"amd64", "linux",
-               "#define        get_tls(r) MOVQ runtime·tlsgm(SB), r\n"
-               "#define        g(r) 0(r)(GS*1)\n"
-               "#define        m(r) 8(r)(GS*1)\n"
+       {"amd64p32", "",
+               "#define        get_tls(r)      MOVL TLS, r\n"
+               "#define        g(r)    0(r)(TLS*1)\n"
+               "#define        m(r)    4(r)(TLS*1)\n"
        },
        {"amd64", "",
-               "#define get_tls(r)\n"
-               "#define g(r) 0(GS)\n"
-               "#define m(r) 8(GS)\n"
+               "#define        get_tls(r)      MOVQ TLS, r\n"
+               "#define        g(r)    0(r)(TLS*1)\n"
+               "#define        m(r)    8(r)(TLS*1)\n"
        },      
+
        {"arm", "",
        "#define        LR      R14\n"
        },
index f4fcc68812bf8f3ca5bfc4ec43fd2f3adaf3d2ec..c822f5bd53a73faedd4e6a4c554cd8977c6b2769 100644 (file)
@@ -183,6 +183,17 @@ relocsym(LSym *s)
                        if(thechar != '6')
                                o = r->add;
                        break;
+               case R_TLS_LE:
+                       o = ctxt->tlsoffset + r->add;
+                       break;
+               case R_TLS_IE:
+                       if(iself || ctxt->headtype == Hplan9)
+                               o = ctxt->tlsoffset + r->add;
+                       else if(ctxt->headtype == Hwindows)
+                               o = r->add;
+                       else
+                               sysfatal("unexpected R_TLS_IE relocation for %s", headstr(ctxt->headtype));
+                       break;
                case R_ADDR:
                        if(linkmode == LinkExternal && r->sym->type != SCONST) {
                                r->done = 0;
@@ -262,6 +273,10 @@ relocsym(LSym *s)
                default:
                        ctxt->cursym = s;
                        diag("bad reloc size %#ux for %s", siz, r->sym->name);
+               case 1:
+                       // TODO(rsc): Remove.
+                       s->p[off] = (int8)o;
+                       break;
                case 4:
                        if(r->type == R_PCREL) {
                                if(o != (int32)o)
@@ -312,6 +327,8 @@ dynrelocsym(LSym *s)
                        return;
                for(r=s->r; r<s->r+s->nr; r++) {
                        targ = r->sym;
+                       if(targ == nil)
+                               continue;
                        if(!targ->reachable)
                                diag("internal inconsistency: dynamic symbol %s is not reachable.", targ->name);
                        if(r->sym->plt == -2 && r->sym->got != -2) { // make dynimport JMP table for PE object files.
index 6bf2449f0c4a4098a2c1db66189461f675a99770..8276fb7066c6aaff57e5a7036d55b49b5b7eefc7 100644 (file)
@@ -143,7 +143,6 @@ main(int argc, char *argv[])
                headstring = headstr(HEADTYPE);
 
        archinit();
-       ctxt->linkmode = linkmode;
        ctxt->debugfloat = debug['F'];
 
        if(debug['v'])
index 213b1b55d780462174541f5a4e88732d830e4f90..104a08e21ed492eade47aa8be27e0522539504e3 100644 (file)
@@ -114,6 +114,7 @@ enum
        Ytr0,   Ytr1,   Ytr2,   Ytr3,   Ytr4,   Ytr5,   Ytr6,   Ytr7,   Yrl32,  Yrl64,
        Ymr, Ymm,
        Yxr, Yxm,
+       Ytls,
        Ymax,
 
        Zxxx            = 0,
@@ -1871,7 +1872,7 @@ instinit(void)
 }
 
 static int
-prefixof(Addr *a)
+prefixof(Link *ctxt, Addr *a)
 {
        switch(a->type) {
        case D_INDIR+D_CS:
@@ -1884,6 +1885,27 @@ prefixof(Addr *a)
                return 0x64;
        case D_INDIR+D_GS:
                return 0x65;
+       case D_INDIR+D_TLS:
+               // NOTE: Systems listed here should be only systems that
+               // support direct TLS references like 8(TLS) implemented as
+               // direct references from FS or GS. Systems that require
+               // the initial-exec model, where you load the TLS base into
+               // a register and then index from that register, do not reach
+               // this code and should not be listed.
+               switch(ctxt->headtype) {
+               default:
+                       sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype));
+               case Hdragonfly:
+               case Hfreebsd:
+               case Hlinux:
+               case Hnetbsd:
+               case Hopenbsd:
+               case Hplan9:
+               case Hsolaris:
+                       return 0x64; // FS
+               case Hdarwin:
+                       return 0x65; // GS
+               }
        }
        switch(a->index) {
        case D_CS:
@@ -2033,6 +2055,7 @@ oclass(Link *ctxt, Addr *a)
        case D_ES:      return  Yes;
        case D_FS:      return  Yfs;
        case D_GS:      return  Ygs;
+       case D_TLS:     return  Ytls;
 
        case D_GDTR:    return  Ygdtr;
        case D_IDTR:    return  Yidtr;
@@ -2278,6 +2301,19 @@ vaddr(Link *ctxt, Addr *a, Reloc *r)
                                r->type = R_PCREL;
                } else
                        r->type = R_ADDR;
+               break;
+       
+       case D_INDIR+D_TLS:
+               if(r == nil) {
+                       ctxt->diag("need reloc for %D", a);
+                       sysfatal("reloc");
+               }
+               r->type = R_TLS_LE;
+               r->siz = 4;
+               r->off = -1;    // caller must fill in
+               r->add = v;
+               v = 0;
+               break;
        }
        return v;
 }
@@ -2294,7 +2330,7 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64)
        v = a->offset;
        t = a->type;
        rel.siz = 0;
-       if(a->index != D_NONE && a->index != D_FS && a->index != D_GS) {
+       if(a->index != D_NONE && a->index != D_TLS) {
                if(t < D_INDIR) { 
                        switch(t) {
                        default:
@@ -2360,9 +2396,11 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64)
                scale = 1;
        } else
                t -= D_INDIR;
+       if(t == D_TLS)
+               v = vaddr(ctxt, a, &rel);
 
        ctxt->rexflag |= (regrex[t] & Rxb) | rex;
-       if(t == D_NONE || (D_CS <= t && t <= D_GS)) {
+       if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) {
                if((ctxt->flag_shared || ctxt->headtype == Hnacl) && t == D_NONE && (a->type == D_STATIC || a->type == D_EXTERN) || ctxt->asmode != 64) {
                        *ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3);
                        goto putrelv;
@@ -2389,17 +2427,38 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64)
                goto putrelv;
        }
        if(t >= D_AX && t <= D_R15) {
-               if(v == 0 && t != D_BP && t != D_R13) {
+               // TODO: Remove Hwindows condition.
+               if(v == 0 && t != D_BP && t != D_R13 && (a->index != D_TLS || (ctxt->headtype == Hwindows && a->scale == 2))) {
                        *ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
                        return;
                }
-               if(v >= -128 && v < 128) {
+               if(v >= -128 && v < 128 && (a->index != D_TLS || a->scale != 1)) {
                        ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
+                       if(a->index == D_TLS) {
+                               Reloc *r;
+                               memset(&rel, 0, sizeof rel);
+                               rel.type = R_TLS_IE;
+                               rel.siz = 1;
+                               rel.sym = nil;
+                               rel.add = v;
+                               r = addrel(ctxt->cursym);
+                               *r = rel;
+                               r->off = ctxt->curp->pc + ctxt->andptr + 1 - ctxt->and;
+                               v = 0;
+                       }
                        ctxt->andptr[1] = v;
                        ctxt->andptr += 2;
                        return;
                }
                *ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
+               if(a->index == D_TLS) {
+                       memset(&rel, 0, sizeof rel);
+                       rel.type = R_TLS_IE;
+                       rel.siz = 4;
+                       rel.sym = nil;
+                       rel.add = v;
+                       v = 0;
+               }
                goto putrelv;
        }
        goto bad;
@@ -2574,6 +2633,10 @@ static Movtab    ymovtab[] =
        {ASHRQ, Ycol,   Yml,    6,      Pw,0xac,0xad,0},
        {ASHLW, Ycol,   Yml,    6,      Pe,0xa4,0xa5,0},
        {ASHRW, Ycol,   Yml,    6,      Pe,0xac,0xad,0},
+
+/* load TLS base */
+       {AMOVQ, Ytls,   Yrl,    7,      0,0,0,0},
+
        0
 };
 
@@ -2664,10 +2727,10 @@ doasm(Link *ctxt, Prog *p)
                return;
        }
        
-       pre = prefixof(&p->from);
+       pre = prefixof(ctxt, &p->from);
        if(pre)
                *ctxt->andptr++ = pre;
-       pre = prefixof(&p->to);
+       pre = prefixof(ctxt, &p->to);
        if(pre)
                *ctxt->andptr++ = pre;
 
@@ -3296,6 +3359,43 @@ mfound:
                        break;
                }
                break;
+       
+       case 7: /* mov tls, r */
+               // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
+               // where you load the TLS base register into a register and then index off that
+               // register to access the actual TLS variables. Systems that allow direct TLS access
+               // are handled in prefixof above and should not be listed here.
+               switch(ctxt->headtype) {
+               default:
+                       sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype));
+
+               case Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
+                       // TLS base is 0(FS).
+                       pp.from = p->from;
+                       pp.from.type = D_INDIR+D_NONE;
+                       pp.from.offset = 0;
+                       pp.from.index = D_NONE;
+                       pp.from.scale = 0;
+                       ctxt->rexflag |= Pw;
+                       *ctxt->andptr++ = 0x64; // FS
+                       *ctxt->andptr++ = 0x8B;
+                       asmand(ctxt, &pp.from, &p->to);
+                       break;
+               
+               case Hwindows:
+                       // Windows TLS base is always 0x28(GS).
+                       pp.from = p->from;
+                       pp.from.type = D_INDIR+D_GS;
+                       pp.from.offset = 0x28;
+                       pp.from.index = D_NONE;
+                       pp.from.scale = 0;
+                       ctxt->rexflag |= Pw;
+                       *ctxt->andptr++ = 0x65; // GS
+                       *ctxt->andptr++ = 0x8B;
+                       asmand(ctxt, &pp.from, &p->to);
+                       break;
+               }
+               break;
        }
 }
 
index 2bf6707e1eb7eb421a8bd359408b2602f09a7f6f..943db80f2f9fd1ef261efc923687e2bd8c5b918a 100644 (file)
@@ -78,6 +78,7 @@ enum
        Ym,
        Ybr,
        Ycol,
+       Ytls,
 
        Ycs,    Yss,    Yds,    Yes,    Yfs,    Ygs,
        Ygdtr,  Yidtr,  Yldtr,  Ymsw,   Ytask,
@@ -1441,7 +1442,7 @@ instinit(void)
 }
 
 static int
-prefixof(Addr *a)
+prefixof(Link *ctxt, Addr *a)
 {
        switch(a->type) {
        case D_INDIR+D_CS:
@@ -1454,6 +1455,23 @@ prefixof(Addr *a)
                return 0x64;
        case D_INDIR+D_GS:
                return 0x65;
+       case D_INDIR+D_TLS:
+               // NOTE: Systems listed here should be only systems that
+               // support direct TLS references like 8(TLS) implemented as
+               // direct references from FS or GS. Systems that require
+               // the initial-exec model, where you load the TLS base into
+               // a register and then index from that register, do not reach
+               // this code and should not be listed.
+               switch(ctxt->headtype) {
+               default:
+                       sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype));
+               case Hdarwin:
+               case Hdragonfly:
+               case Hfreebsd:
+               case Hnetbsd:
+               case Hopenbsd:
+                       return 0x65; // GS
+               }
        }
        return 0;
 }
@@ -1543,6 +1561,7 @@ oclass(Addr *a)
        case D_ES:      return  Yes;
        case D_FS:      return  Yfs;
        case D_GS:      return  Ygs;
+       case D_TLS:     return  Ytls;
 
        case D_GDTR:    return  Ygdtr;
        case D_IDTR:    return  Yidtr;
@@ -1724,6 +1743,19 @@ vaddr(Link *ctxt, Addr *a, Reloc *r)
                        r->add = v;
                        v = 0;
                }
+               break;
+       
+       case D_INDIR+D_TLS:
+               if(r == nil) {
+                       ctxt->diag("need reloc for %D", a);
+                       sysfatal("bad code");
+               }
+               r->type = R_TLS_LE;
+               r->siz = 4;
+               r->off = -1; // caller must fill in
+               r->add = v;
+               v = 0;
+               break;
        }
        return v;
 }
@@ -1738,7 +1770,7 @@ asmand(Link *ctxt, Addr *a, int r)
        v = a->offset;
        t = a->type;
        rel.siz = 0;
-       if(a->index != D_NONE && a->index != D_FS && a->index != D_GS) {
+       if(a->index != D_NONE && a->index != D_TLS) {
                if(t < D_INDIR || t >= 2*D_INDIR) {
                        switch(t) {
                        default:
@@ -1801,8 +1833,10 @@ asmand(Link *ctxt, Addr *a, int r)
                scale = 1;
        } else
                t -= D_INDIR;
+       if(t == D_TLS)
+               v = vaddr(ctxt, a, &rel);
 
-       if(t == D_NONE || (D_CS <= t && t <= D_GS)) {
+       if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) {
                *ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3);
                goto putrelv;
        }
@@ -1823,17 +1857,43 @@ asmand(Link *ctxt, Addr *a, int r)
                goto putrelv;
        }
        if(t >= D_AX && t <= D_DI) {
-               if(v == 0 && rel.siz == 0 && t != D_BP) {
+               // TODO(rsc): Remove the Hwindows test.
+               // As written it produces the same byte-identical output as the code it replaced.
+               if(v == 0 && rel.siz == 0 && t != D_BP && (a->index != D_TLS || ctxt->headtype == Hwindows)) {
                        *ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
                        return;
                }
-               if(v >= -128 && v < 128 && rel.siz == 0 && a->index != D_FS && a->index != D_GS) {
+               // TODO(rsc): Change a->index tests to check D_TLS.
+               // Then remove the if statement inside the body.
+               // As written the code is clearly incorrect for external linking,
+               // but as written it produces the same byte-identical output as the code it replaced.
+               if(v >= -128 && v < 128 && rel.siz == 0 && (a->index != D_TLS || ctxt->headtype == Hwindows || a->scale != 1))  {
                        ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
+                       if(a->index == D_TLS) {
+                               Reloc *r;
+                               memset(&rel, 0, sizeof rel);
+                               rel.type = R_TLS_IE;
+                               rel.siz = 1;
+                               rel.sym = nil;
+                               rel.add = v;
+                               r = addrel(ctxt->cursym);
+                               *r = rel;
+                               r->off = ctxt->curp->pc + ctxt->andptr + 1 - ctxt->and;
+                               v = 0;
+                       }
                        ctxt->andptr[1] = v;
                        ctxt->andptr += 2;
                        return;
                }
                *ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
+               if(a->index == D_TLS) {
+                       memset(&rel, 0, sizeof rel);
+                       rel.type = R_TLS_IE;
+                       rel.siz = 4;
+                       rel.sym = nil;
+                       rel.add = v;
+                       v = 0;
+               }
                goto putrelv;
        }
        goto bad;
@@ -1961,6 +2021,10 @@ static uchar     ymovtab[] =
 /* extra imul */
        AIMULW, Yml,    Yrl,    7,      Pq,0xaf,0,0,
        AIMULL, Yml,    Yrl,    7,      Pm,0xaf,0,0,
+
+/* load TLS base pointer */
+       AMOVL,  Ytls,   Yrl,    8,      0,0,0,0,
+
        0
 };
 
@@ -2108,10 +2172,10 @@ doasm(Link *ctxt, Prog *p)
        
        ctxt->curp = p; // TODO
 
-       pre = prefixof(&p->from);
+       pre = prefixof(ctxt, &p->from);
        if(pre)
                *ctxt->andptr++ = pre;
-       pre = prefixof(&p->to);
+       pre = prefixof(ctxt, &p->to);
        if(pre)
                *ctxt->andptr++ = pre;
 
@@ -2628,6 +2692,54 @@ mfound:
                *ctxt->andptr++ = t[5];
                asmand(ctxt, &p->from, reg[p->to.type]);
                break;
+       
+       case 8: /* mov tls, r */
+               // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
+               // where you load the TLS base register into a register and then index off that
+               // register to access the actual TLS variables. Systems that allow direct TLS access
+               // are handled in prefixof above and should not be listed here.
+               switch(ctxt->headtype) {
+               default:
+                       sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype));
+
+               case Hlinux:
+               case Hnacl:
+                       // ELF TLS base is 0(GS).
+                       pp.from = p->from;
+                       pp.from.type = D_INDIR+D_GS;
+                       pp.from.offset = 0;
+                       pp.from.index = D_NONE;
+                       pp.from.scale = 0;
+                       *ctxt->andptr++ = 0x65; // GS
+                       *ctxt->andptr++ = 0x8B;
+                       asmand(ctxt, &pp.from, reg[p->to.type]);
+                       break;
+               
+               case Hplan9:
+                       if(ctxt->plan9tos == nil)
+                               ctxt->plan9tos = linklookup(ctxt, "_tos", 0);
+                       memset(&pp.from, 0, sizeof pp.from);
+                       pp.from.type = D_EXTERN;
+                       pp.from.sym = ctxt->plan9tos;
+                       pp.from.offset = 0;
+                       pp.from.index = D_NONE;
+                       *ctxt->andptr++ = 0x8B;
+                       asmand(ctxt, &pp.from, reg[p->to.type]);
+                       break;
+
+               case Hwindows:
+                       // Windows TLS base is always 0x14(FS).
+                       pp.from = p->from;
+                       pp.from.type = D_INDIR+D_FS;
+                       pp.from.offset = 0x14;
+                       pp.from.index = D_NONE;
+                       pp.from.scale = 0;
+                       *ctxt->andptr++ = 0x64; // FS
+                       *ctxt->andptr++ = 0x8B;
+                       asmand(ctxt, &pp.from, reg[p->to.type]);
+                       break;
+               }
+               break;
        }
 }
 
index eaf52f2e730a27163346221dd8fde86d8434608c..fe708d87748490c58c930fa2073aa84af0937ace 100644 (file)
@@ -341,6 +341,7 @@ char*       regstr[] =
        "TR6",
        "TR7",
 
+       "TLS",  /* [D_TLS] */
        "NONE", /* [D_NONE] */
 };
 
index c000ce25b06addfe20c73c14760949b3e4b10a09..78669241630e78e7af3f5c91bcbb533026550fac 100644 (file)
@@ -289,6 +289,7 @@ char*       regstr[] =
        "X6",
        "X7",
 
+       "TLS",  /* [D_TLS] */
        "NONE", /* [D_NONE] */
 };
 
index b4329e886204bc5f68403b96378358d85d45aab1..fbb96c5e9ea554ff79df69c32d17fc807affc581 100644 (file)
@@ -99,6 +99,17 @@ settextflag(Prog *p, int f)
 
 static void nacladdr(Link*, Prog*, Addr*);
 
+static int
+canuselocaltls(Link *ctxt)
+{
+       switch(ctxt->headtype) {
+//     case Hlinux:
+       case Hwindows:
+               return 0;
+       }
+       return 1;
+}
+
 static void
 progedit(Link *ctxt, Prog *p)
 {
@@ -106,105 +117,98 @@ progedit(Link *ctxt, Prog *p)
        LSym *s;
        Prog *q;
 
-       if(ctxt->headtype == Hnacl) {
-               nacladdr(ctxt, p, &p->from);
-               nacladdr(ctxt, p, &p->to);
-       }
-
-       if(p->from.type == D_INDIR+D_GS || p->from.index == D_GS)
-               p->from.offset += ctxt->tlsoffset;
-       if(p->to.type == D_INDIR+D_GS || p->to.index == D_GS)
-               p->to.offset += ctxt->tlsoffset;
-
-       if(ctxt->gmsym == nil)
-               ctxt->gmsym = linklookup(ctxt, "runtime.tlsgm", 0);
-
-       if(ctxt->headtype == Hwindows) { 
-               // Windows
-               // Convert
-               //   op   n(GS), reg
-               // to
-               //   MOVL 0x28(GS), reg
-               //   op   n(reg), reg
-               // The purpose of this patch is to fix some accesses
-               // to extern register variables (TLS) on Windows, as
-               // a different method is used to access them.
-               if(p->from.type == D_INDIR+D_GS
-               && p->to.type >= D_AX && p->to.type <= D_DI 
-               && p->from.offset <= 8) {
-                       q = appendp(ctxt, p);
-                       q->from = p->from;
-                       q->from.type = D_INDIR + p->to.type;
-                       q->to = p->to;
-                       q->as = p->as;
-                       p->as = AMOVQ;
-                       p->from.type = D_INDIR+D_GS;
-                       p->from.offset = 0x28;
-               }
-       }
-       if(ctxt->headtype == Hlinux || ctxt->headtype == Hfreebsd
-       || ctxt->headtype == Hopenbsd || ctxt->headtype == Hnetbsd
-       || ctxt->headtype == Hplan9 || ctxt->headtype == Hdragonfly
-       || ctxt->headtype == Hsolaris) {
-               // ELF uses FS instead of GS.
-               if(p->from.type == D_INDIR+D_GS)
-                       p->from.type = D_INDIR+D_FS;
-               if(p->to.type == D_INDIR+D_GS)
-                       p->to.type = D_INDIR+D_FS;
-               if(p->from.index == D_GS)
-                       p->from.index = D_FS;
-               if(p->to.index == D_GS)
-                       p->to.index = D_FS;
-       }
-       if(!ctxt->flag_shared) {
-               // Convert g() or m() accesses of the form
-               //   op n(reg)(GS*1), reg
-               // to
-               //   op n(GS*1), reg
-               if(p->from.index == D_FS || p->from.index == D_GS) {
-                       p->from.type = D_INDIR + p->from.index;
+       // Thread-local storage references use the TLS pseudo-register.
+       // As a register, TLS refers to the thread-local storage base, and it
+       // can only be loaded into another register:
+       //
+       //         MOVQ TLS, AX
+       //
+       // An offset from the thread-local storage base is written off(reg)(TLS*1).
+       // Semantically it is off(reg), but the (TLS*1) annotation marks this as
+       // indexing from the loaded TLS base. This emits a relocation so that
+       // if the linker needs to adjust the offset, it can. For example:
+       //
+       //         MOVQ TLS, AX
+       //         MOVQ 8(AX)(TLS*1), CX // load m into CX
+       // 
+       // On systems that support direct access to the TLS memory, this
+       // pair of instructions can be reduced to a direct TLS memory reference:
+       // 
+       //         MOVQ 8(TLS), CX // load m into CX
+       //
+       // The 2-instruction and 1-instruction forms correspond roughly to
+       // ELF TLS initial exec mode and ELF TLS local exec mode, respectively.
+       // 
+       // We applies this rewrite on systems that support the 1-instruction form.
+       // The decision is made using only the operating system (and probably
+       // the -shared flag, eventually), not the link mode. If some link modes
+       // on a particular operating system require the 2-instruction form,
+       // then all builds for that operating system will use the 2-instruction
+       // form, so that the link mode decision can be delayed to link time.
+       //
+       // In this way, all supported systems use identical instructions to
+       // access TLS, and they are rewritten appropriately first here in
+       // liblink and then finally using relocations in the linker.
+
+       if(canuselocaltls(ctxt)) {
+               // Reduce TLS initial exec model to TLS local exec model.
+               // Sequences like
+               //      MOVQ TLS, BX
+               //      ... off(BX)(TLS*1) ...
+               // become
+               //      NOP
+               //      ... off(TLS) ...
+               //
+               // TODO(rsc): Remove the Hsolaris special case. It exists only to
+               // guarantee we are producing byte-identical binaries as before this code.
+               // But it should be unnecessary.
+               if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_R15 && ctxt->headtype != Hsolaris)
+                       nopout(p);
+               if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_R15) {
+                       p->from.type = D_INDIR+D_TLS;
+                       p->from.scale = 0;
                        p->from.index = D_NONE;
                }
-               // Convert g() or m() accesses of the form
-               //   op reg, n(reg)(GS*1)
-               // to
-               //   op reg, n(GS*1)
-               if(p->to.index == D_FS || p->to.index == D_GS) {
-                       p->to.type = D_INDIR + p->to.index;
+               if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_R15) {
+                       p->to.type = D_INDIR+D_TLS;
+                       p->to.scale = 0;
                        p->to.index = D_NONE;
                }
-               // Convert get_tls access of the form
-               //   op runtime.tlsgm(SB), reg
-               // to
-               //   NOP
-               if(ctxt->gmsym != nil && p->from.sym == ctxt->gmsym) {
-                       p->as = ANOP;
-                       p->from.type = D_NONE;
-                       p->to.type = D_NONE;
-                       p->from.sym = nil;
-                       p->to.sym = nil;
-               }
        } else {
-               // Convert TLS reads of the form
-               //   op n(GS), reg
-               // to
-               //   MOVQ $runtime.tlsgm(SB), reg
-               //   op n(reg)(GS*1), reg
-               if((p->from.type == D_INDIR+D_FS || p->from.type == D_INDIR + D_GS) && p->to.type >= D_AX && p->to.type <= D_DI) {
+               // As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load.
+               // The instruction
+               //      MOVQ off(TLS), BX
+               // becomes the sequence
+               //      MOVQ TLS, BX
+               //      MOVQ off(BX)(TLS*1), BX
+               // This allows the C compilers to emit references to m and g using the direct off(TLS) form.
+               if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_R15) {
                        q = appendp(ctxt, p);
-                       q->to = p->to;
                        q->as = p->as;
-                       q->from.type = D_INDIR+p->to.type;
-                       q->from.index = p->from.type - D_INDIR;
-                       q->from.scale = 1;
-                       q->from.offset = p->from.offset;
-                       p->as = AMOVQ;
-                       p->from.type = D_EXTERN;
-                       p->from.sym = ctxt->gmsym;
+                       q->from = p->from;
+                       q->from.type = D_INDIR + p->to.type;
+                       q->from.index = D_TLS;
+                       q->from.scale = 2; // TODO: use 1
+                       q->to = p->to;
+                       p->from.type = D_TLS;
+                       p->from.index = D_NONE;
                        p->from.offset = 0;
                }
        }
 
+       // TODO: Remove.
+       if(ctxt->headtype == Hwindows || ctxt->headtype == Hplan9) {
+               if(p->from.scale == 1 && p->from.index == D_TLS)
+                       p->from.scale = 2;
+               if(p->to.scale == 1 && p->to.index == D_TLS)
+                       p->to.scale = 2;
+       }
+
+       if(ctxt->headtype == Hnacl) {
+               nacladdr(ctxt, p, &p->from);
+               nacladdr(ctxt, p, &p->to);
+       }
+
        // Maintain information about code generation mode.
        if(ctxt->mode == 0)
                ctxt->mode = 64;
@@ -315,9 +319,9 @@ nacladdr(Link *ctxt, Prog *p, Addr *a)
                ctxt->diag("invalid address: %P", p);
                return;
        }
-       if(a->type == D_INDIR+D_GS)
+       if(a->type == D_INDIR+D_TLS)
                a->type = D_INDIR+D_BP;
-       else if(a->type == D_GS)
+       else if(a->type == D_TLS)
                a->type = D_BP;
        if(D_INDIR <= a->type && a->type <= D_INDIR+D_INDIR) {
                switch(a->type) {
@@ -632,48 +636,24 @@ indir_cx(Link *ctxt, Addr *a)
 // Returns last new instruction.
 static Prog*
 load_g_cx(Link *ctxt, Prog *p)
-{
-       if(ctxt->flag_shared) {
-               // Load TLS offset with MOVQ $runtime.tlsgm(SB), CX
-               p->as = AMOVQ;
-               p->from.type = D_EXTERN;
-               p->from.sym = ctxt->gmsym;
-               p->to.type = D_CX;
-               p = appendp(ctxt, p);
-       }
+{      
+       Prog *next;
+
        p->as = AMOVQ;
-       if(ctxt->headtype == Hlinux || ctxt->headtype == Hfreebsd
-       || ctxt->headtype == Hopenbsd || ctxt->headtype == Hnetbsd
-       || ctxt->headtype == Hplan9 || ctxt->headtype == Hdragonfly
-       || ctxt->headtype == Hsolaris)
-               // ELF uses FS
-               p->from.type = D_INDIR+D_FS;
-       else if(ctxt->headtype == Hnacl) {
+       if(ctxt->arch->ptrsize == 4)
                p->as = AMOVL;
-               p->from.type = D_INDIR+D_BP;
-       } else
-               p->from.type = D_INDIR+D_GS;
-       if(ctxt->flag_shared) {
-               // Add TLS offset stored in CX
-               p->from.index = p->from.type - D_INDIR;
-               indir_cx(ctxt, &p->from);
-       }
-       p->from.offset = ctxt->tlsoffset+0;
+       p->from.type = D_INDIR+D_TLS;
+       p->from.offset = 0;
        p->to.type = D_CX;
-       if(ctxt->headtype == Hwindows) {
-               // movq %gs:0x28, %rcx
-               // movq (%rcx), %rcx
-               p->as = AMOVQ;
-               p->from.type = D_INDIR+D_GS;
-               p->from.offset = 0x28;
-               p->to.type = D_CX;
+       
+       next = p->link;
+       progedit(ctxt, p);
+       while(p->link != next)
+               p = p->link;
+       
+       if(p->from.index == D_TLS)
+               p->from.scale = 2;
 
-               p = appendp(ctxt, p);
-               p->as = AMOVQ;
-               indir_cx(ctxt, &p->from);
-               p->from.offset = 0;
-               p->to.type = D_CX;
-       }
        return p;
 }
 
index d36db84705e0cd255a4eff226621047b88fd6d44..72934c1499872380c192ce88644ee155930c4448 100644 (file)
@@ -91,80 +91,80 @@ settextflag(Prog *p, int f)
        p->from.scale = f;
 }
 
+static int
+canuselocaltls(Link *ctxt)
+{
+       switch(ctxt->headtype) {
+       case Hlinux:
+       case Hnacl:
+       case Hplan9:
+       case Hwindows:
+               return 0;
+       }
+       return 1;
+}
+
 static void
 progedit(Link *ctxt, Prog *p)
 {
-       Prog *q;
        char literal[64];
        LSym *s;
-
-       if(p->from.type == D_INDIR+D_GS)
-               p->from.offset += ctxt->tlsoffset;
-       if(p->to.type == D_INDIR+D_GS)
-               p->to.offset += ctxt->tlsoffset;
-
-       if(ctxt->headtype == Hwindows) {
-               // Convert
-               //   op   n(GS), reg
-               // to
-               //   MOVL 0x14(FS), reg
-               //   op   n(reg), reg
-               // The purpose of this patch is to fix some accesses
-               // to extern register variables (TLS) on Windows, as
-               // a different method is used to access them.
-               if(p->from.type == D_INDIR+D_GS
-               && p->to.type >= D_AX && p->to.type <= D_DI) {
-                       q = appendp(ctxt, p);
-                       q->from = p->from;
-                       q->from.type = D_INDIR + p->to.type;
-                       q->to = p->to;
-                       q->as = p->as;
-                       p->as = AMOVL;
-                       p->from.type = D_INDIR+D_FS;
-                       p->from.offset = 0x14;
+       Prog *q;
+       
+       // See obj6.c for discussion of TLS.
+       if(canuselocaltls(ctxt)) {
+               // Reduce TLS initial exec model to TLS local exec model.
+               // Sequences like
+               //      MOVL TLS, BX
+               //      ... off(BX)(TLS*1) ...
+               // become
+               //      NOP
+               //      ... off(TLS) ...
+               if(p->as == AMOVL && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_DI) {
+                       p->as = ANOP;
+                       p->from.type = D_NONE;
+                       p->to.type = D_NONE;
                }
-       }
-       if(ctxt->headtype == Hlinux || ctxt->headtype == Hnacl) {
-               // Running binaries under Xen requires using
-               //      MOVL 0(GS), reg
-               // and then off(reg) instead of saying off(GS) directly
-               // when the offset is negative.
-               // In external mode we just produce a reloc.
-               if(p->from.type == D_INDIR+D_GS && p->from.offset < 0
-               && p->to.type >= D_AX && p->to.type <= D_DI) {
-                       if(ctxt->linkmode != LinkExternal) {
-                               q = appendp(ctxt, p);
-                               q->from = p->from;
-                               q->from.type = D_INDIR + p->to.type;
-                               q->to = p->to;
-                               q->as = p->as;
-                               p->as = AMOVL;
-                               p->from.type = D_INDIR+D_GS;
-                               p->from.offset = 0;
-                       } else {
-                               // Add signals to relocate.
-                               p->from.index = D_GS;
-                               p->from.scale = 1;
-                       }
+               if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_DI) {
+                       p->from.type = D_INDIR+D_TLS;
+                       p->from.scale = 0;
+                       p->from.index = D_NONE;
                }
-       }
-       if(ctxt->headtype == Hplan9) {
-               if(p->from.type == D_INDIR+D_GS
-               && p->to.type >= D_AX && p->to.type <= D_DI) {
-                       if(ctxt->plan9tos == nil)
-                               ctxt->plan9tos = linklookup(ctxt, "_tos", 0);
+               if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_DI) {
+                       p->to.type = D_INDIR+D_TLS;
+                       p->to.scale = 0;
+                       p->to.index = D_NONE;
+               }
+       } else {
+               // As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load.
+               // The instruction
+               //      MOVL off(TLS), BX
+               // becomes the sequence
+               //      MOVL TLS, BX
+               //      MOVL off(BX)(TLS*1), BX
+               // This allows the C compilers to emit references to m and g using the direct off(TLS) form.
+               if(p->as == AMOVL && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_DI) {
                        q = appendp(ctxt, p);
+                       q->as = p->as;
                        q->from = p->from;
                        q->from.type = D_INDIR + p->to.type;
+                       q->from.index = D_TLS;
+                       q->from.scale = 2; // TODO: use 1
                        q->to = p->to;
-                       q->as = p->as;
-                       p->as = AMOVL;
-                       p->from.type = D_EXTERN;
-                       p->from.sym = ctxt->plan9tos;
+                       p->from.type = D_TLS;
+                       p->from.index = D_NONE;
                        p->from.offset = 0;
                }
        }
 
+       // TODO: Remove.
+       if(ctxt->headtype == Hplan9) {
+               if(p->from.scale == 1 && p->from.index == D_TLS)
+                       p->from.scale = 2;
+               if(p->to.scale == 1 && p->to.index == D_TLS)
+                       p->to.scale = 2;
+       }
+
        // Rewrite CALL/JMP/RET to symbol as D_BRANCH.
        switch(p->as) {
        case ACALL:
@@ -435,62 +435,21 @@ addstacksplit(Link *ctxt, LSym *cursym)
 static Prog*
 load_g_cx(Link *ctxt, Prog *p)
 {
-       switch(ctxt->headtype) {
-       case Hwindows:
-               p->as = AMOVL;
-               p->from.type = D_INDIR+D_FS;
-               p->from.offset = 0x14;
-               p->to.type = D_CX;
+       Prog *next;
 
-               p = appendp(ctxt, p);
-               p->as = AMOVL;
-               p->from.type = D_INDIR+D_CX;
-               p->from.offset = 0;
-               p->to.type = D_CX;
-               break;
+       p->as = AMOVL;
+       p->from.type = D_INDIR+D_TLS;
+       p->from.offset = 0;
+       p->to.type = D_CX;
+
+       next = p->link;
+       progedit(ctxt, p);
+       while(p->link != next)
+               p = p->link;
        
-       case Hlinux:
-       case Hnacl:
-               if(ctxt->linkmode != LinkExternal) {
-                       p->as = AMOVL;
-                       p->from.type = D_INDIR+D_GS;
-                       p->from.offset = 0;
-                       p->to.type = D_CX;
+       if(p->from.index == D_TLS)
+               p->from.scale = 2;
 
-                       p = appendp(ctxt, p);
-                       p->as = AMOVL;
-                       p->from.type = D_INDIR+D_CX;
-                       p->from.offset = ctxt->tlsoffset + 0;
-                       p->to.type = D_CX;
-               } else {
-                       p->as = AMOVL;
-                       p->from.type = D_INDIR+D_GS;
-                       p->from.offset = ctxt->tlsoffset + 0;
-                       p->to.type = D_CX;
-                       p->from.index = D_GS;
-                       p->from.scale = 1;
-               }
-               break;
-       
-       case Hplan9:
-               p->as = AMOVL;
-               p->from.type = D_EXTERN;
-               p->from.sym = ctxt->plan9tos;
-               p->to.type = D_CX;
-               
-               p = appendp(ctxt, p);
-               p->as = AMOVL;
-               p->from.type = D_INDIR+D_CX;
-               p->from.offset = ctxt->tlsoffset + 0;
-               p->to.type = D_CX;                              
-               break;
-       
-       default:
-               p->as = AMOVL;
-               p->from.type = D_INDIR+D_GS;
-               p->from.offset = ctxt->tlsoffset + 0;
-               p->to.type = D_CX;
-       }
        return p;
 }
 
index 2b11add3b674c16262738a9590c8062dd7d485b3..c7700cc25ce45e1536fe6a64c15492d7b42e900c 100644 (file)
@@ -274,6 +274,7 @@ writesym(Link *ctxt, Biobuf *b, LSym *s)
        Pcln *pc;
        Prog *p;
        Auto *a;
+       char *name;
 
        if(ctxt->debugasm) {
                Bprint(ctxt->bso, "%s ", s->name);
@@ -308,7 +309,10 @@ writesym(Link *ctxt, Biobuf *b, LSym *s)
                }
                for(i=0; i<s->nr; i++) {
                        r = &s->r[i];
-                       Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, r->sym->name, (vlong)r->add);
+                       name = "";
+                       if(r->sym != nil)
+                               name = r->sym->name;
+                       Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, name, (vlong)r->add);
                }
        }
 
index 3990f7200ef823db3267283ddb597b5f642ba1d2..29fc036bcb7cfc37d6f3a293ca53850239cc915d 100644 (file)
@@ -118,6 +118,7 @@ linknew(LinkArch *arch)
                sysfatal("unknown goos %s", getgoos());
        
        // Record thread-local storage offset.
+       // TODO(rsc): Move tlsoffset back into the linker.
        switch(ctxt->headtype) {
        default:
                sysfatal("unknown thread-local storage offset for %s", headstr(ctxt->headtype));
index 27efc8a31c13f2871c23fc40f0079a420f3b0448..864b681f4a1d6c86d5d9adcdbaba0123262ce9ab 100644 (file)
@@ -99,10 +99,10 @@ typedef     struct  DebugVars       DebugVars;
  *
  * "extern register" is a special storage class implemented by 6c, 8c, etc.
  * On the ARM, it is an actual register; elsewhere it is a slot in thread-
- * local storage indexed by a segment register. See zasmhdr in
+ * local storage indexed by a pseudo-register TLS. See zasmhdr in
  * src/cmd/dist/buildruntime.c for details, and be aware that the linker may
  * make further OS-specific changes to the compiler's output. For example,
- * 6l/linux rewrites 0(GS) as -16(FS).
+ * 6l/linux rewrites 0(TLS) as -16(FS).
  *
  * Every C file linked into a Go program must include runtime.h so that the
  * C compiler (6c, 8c, etc.) knows to avoid other uses of these dedicated
index c2a259e5b1339a4663a8cb2de650914f3658799f..bfaaa00a7ecc33eb755a7417f44749d5ad7af020 100644 (file)
@@ -457,8 +457,7 @@ TEXT runtime·setldt(SB),NOSPLIT,$32
         * we use its pthread_create and let it set up %gs
         * for us.  When we do that, the private storage
         * we get is not at 0(GS) but at 0x468(GS).
-        * To insulate the rest of the tool chain from this ugliness,
-        * 8l rewrites 0(GS) into 0x468(GS) for us.
+        * 8l rewrites 0(TLS) into 0x468(GS) for us.
         * To accommodate that rewrite, we translate the
         * address and limit here so that 0x468(GS) maps to 0(address).
         *
index cdd729957c51c138d371d258a7085089fc80ce87..b7896f1786bfc07857a0c9e5adb107dcdc448bff 100644 (file)
@@ -383,7 +383,7 @@ TEXT runtime·setldt(SB),NOSPLIT,$32
         * for us.  When we do that, the private storage
         * we get is not at 0(GS), 4(GS), but -8(GS), -4(GS).
         * To insulate the rest of the tool chain from this
-        * ugliness, 8l rewrites 0(GS) into -8(GS) for us.
+        * ugliness, 8l rewrites 0(TLS) into -8(GS) for us.
         * To accommodate that rewrite, we translate
         * the address here and bump the limit to 0xffffffff (no limit)
         * so that -8(GS) maps to 0(address).
index 377e1653f01311cc854fe49fa8700cfa6d1108a8..43c172372113f61082c3c3d5a61e82afa83a482d 100644 (file)
@@ -13,7 +13,7 @@
        MOVL $(0x10000 + ((code)<<5)), AX; JMP AX
 
 TEXT runtime·settls(SB),NOSPLIT,$0
-       MOVL    DI, GS // really BP
+       MOVL    DI, TLS // really BP
        RET
 
 TEXT runtime·exit(SB),NOSPLIT,$0
@@ -173,7 +173,7 @@ TEXT runtime·nacl_thread_create(SB),NOSPLIT,$0
 TEXT runtime·mstart_nacl(SB),NOSPLIT,$0
        NACL_SYSCALL(SYS_tls_get)
        SUBL    $8, AX
-       MOVL    AX, GS
+       MOVL    AX, TLS
        JMP runtime·mstart(SB)
 
 TEXT runtime·nacl_nanosleep(SB),NOSPLIT,$0
@@ -254,12 +254,12 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$80
        // restore TLS register at time of execution,
        // in case it's been smashed.
        // the TLS register is really BP, but for consistency
-       // with non-NaCl systems it is referred to here as GS.
+       // with non-NaCl systems it is referred to here as TLS.
        // NOTE: Cannot use SYS_tls_get here (like we do in mstart_nacl),
        // because the main thread never calls tls_set.
        LEAL ctxt+0(FP), AX
        MOVL (16*4+5*8)(AX), AX
-       MOVL    AX, GS
+       MOVL    AX, TLS
 
        // check that m exists
        get_tls(CX)
@@ -305,7 +305,7 @@ sigtramp_ret:
        MOVQ    16(SI), DX
        MOVQ    24(SI), BX
        MOVL    32(SI), SP      // MOVL for SP sandboxing
-       // 40(SI) is saved BP aka GS, already restored above
+       // 40(SI) is saved BP aka TLS, already restored above
        // 48(SI) is saved SI, never to be seen again
        MOVQ    56(SI), DI
        MOVQ    64(SI), R8
index 2513af9cba038df0d372b0d414bda8bfa8b9b0b5..143cd2e49839fe7b4018bdf49edaee792dc9b892 100644 (file)
@@ -100,8 +100,9 @@ TEXT runtime·rfork(SB),NOSPLIT,$0
        MOVL    DX, g(AX)
        MOVL    BX, m(AX)
 
-       // Initialize AX from TOS struct.
-       MOVL    procid(AX), AX
+       // Initialize procid from TOS struct.
+       // TODO: Be explicit and insert a new MOVL _tos(SB), AX here.
+       MOVL    48(AX), AX // procid
        MOVL    AX, m_procid(BX)        // save pid as m->procid
        
        CALL    runtime·stackcheck(SB) // smashes AX, CX
index d6702e865f121ccf6c55fc7e2a257e9a08f06e5c..e60459cb8efaab63d980fb0c84a45f8d420d637b 100644 (file)
@@ -136,7 +136,7 @@ TEXT runtime·rfork(SB),NOSPLIT,$0
        MOVQ    BX, m(AX)
 
        // Initialize AX from pid in TLS.
-       MOVQ    procid(AX), AX
+       MOVQ    0(FS), AX
        MOVQ    AX, m_procid(BX)        // save pid as m->procid
        
        CALL    runtime·stackcheck(SB) // smashes AX, CX