]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/asm,cmd/internal/obj/riscv,cmd/link: improve TLS handling on riscv64
authorJoel Sing <joel@sing.id.au>
Thu, 15 Sep 2022 16:29:12 +0000 (02:29 +1000)
committerJoel Sing <joel@sing.id.au>
Thu, 3 Aug 2023 16:15:14 +0000 (16:15 +0000)
The existing Thread Local Storage (TLS) implementation for riscv64 uses
initial-exec (IE) mode, however a MOV of a TLS symbol currently loads the
thread pointer offset and not the actual address or memory location.

Rework TLS on riscv64 to generate the full instruction sequence needed to
load from or store to a TLS symbol. Additionally, provide support for both
initial-exec (IE) and local-exec (LE) TLS - in many cases we can use LE,
which is slightly more efficient and easier to support in the linker.

Change-Id: I1b43f8888b3b6b10354bbb79d604771e64d92645
Reviewed-on: https://go-review.googlesource.com/c/go/+/431103
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: M Zhuo <mzh@golangcn.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Joel Sing <joel@sing.id.au>

src/cmd/asm/internal/asm/endtoend_test.go
src/cmd/asm/internal/asm/testdata/riscv64.s
src/cmd/internal/obj/riscv/obj.go
src/cmd/internal/objabi/reloctype.go
src/cmd/internal/objabi/reloctype_string.go
src/cmd/link/internal/riscv64/asm.go
src/runtime/tls_riscv64.s

index 778d17dbd2d1a9360cf9629a60005dd31221b9c1..1ec9ebd5b512d4231359822c735ebae140ba3909 100644 (file)
@@ -68,6 +68,11 @@ Diff:
                        continue
                }
 
+               // Ignore GLOBL.
+               if strings.HasPrefix(line, "GLOBL ") {
+                       continue
+               }
+
                // The general form of a test input line is:
                //      // comment
                //      INST args [// printed form] [// hex encoding]
index 53538320f0dd558c6b402f3edff04339ed4939a4..9899ec9e7b63a3636de37a099576294b5848f1b8 100644 (file)
@@ -354,6 +354,14 @@ start:
        MOVD    F0, 4(X5)                               // 27b20200
        MOVD    F0, F1                                  // d3000022
 
+       // TLS load with local-exec (LUI + ADDIW + ADD of TP + load)
+       MOV     tls(SB), X5                             // b70f00009b8f0f00b38f4f0083b20f00
+       MOVB    tls(SB), X5                             // b70f00009b8f0f00b38f4f0083820f00
+
+       // TLS store with local-exec (LUI + ADDIW + ADD of TP + store)
+       MOV     X5, tls(SB)                             // b70f00009b8f0f00b38f4f0023b05f00
+       MOVB    X5, tls(SB)                             // b70f00009b8f0f00b38f4f0023805f00
+
        // NOT pseudo-instruction
        NOT     X5                                      // 93c2f2ff
        NOT     X5, X6                                  // 13c3f2ff
@@ -407,3 +415,5 @@ start:
        FLTD    F0, F1, X5                              // d39200a2
        FLED    F0, F1, X5                              // d38200a2
        FEQD    F0, F1, X5                              // d3a200a2
+
+GLOBL tls(SB), TLSBSS, $8
index 43fa7351bf990e8cb2c0732e8cb89ea75806f0af..2e55fac8123140ad2bdefc5ac6b31041f55d1b28 100644 (file)
@@ -1827,6 +1827,53 @@ func instructionsForStore(p *obj.Prog, as obj.As, rd int16) []*instruction {
        return []*instruction{insLUI, insADD, ins}
 }
 
+func instructionsForTLS(p *obj.Prog, ins *instruction) []*instruction {
+       insAddTP := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: REG_TP}
+
+       var inss []*instruction
+       if p.Ctxt.Flag_shared {
+               // TLS initial-exec mode - load TLS offset from GOT, add the thread pointer
+               // register, then load from or store to the resulting memory location.
+               insAUIPC := &instruction{as: AAUIPC, rd: REG_TMP}
+               insLoadTLSOffset := &instruction{as: ALD, rd: REG_TMP, rs1: REG_TMP}
+               inss = []*instruction{insAUIPC, insLoadTLSOffset, insAddTP, ins}
+       } else {
+               // TLS local-exec mode - load upper TLS offset, add the lower TLS offset,
+               // add the thread pointer register, then load from or store to the resulting
+               // memory location. Note that this differs from the suggested three
+               // instruction sequence, as the Go linker does not currently have an
+               // easy way to handle relocation across 12 bytes of machine code.
+               insLUI := &instruction{as: ALUI, rd: REG_TMP}
+               insADDIW := &instruction{as: AADDIW, rd: REG_TMP, rs1: REG_TMP}
+               inss = []*instruction{insLUI, insADDIW, insAddTP, ins}
+       }
+       return inss
+}
+
+func instructionsForTLSLoad(p *obj.Prog) []*instruction {
+       if p.From.Sym.Type != objabi.STLSBSS {
+               p.Ctxt.Diag("%v: %v is not a TLS symbol", p, p.From.Sym)
+               return nil
+       }
+
+       ins := instructionForProg(p)
+       ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), REG_TMP, obj.REG_NONE, 0
+
+       return instructionsForTLS(p, ins)
+}
+
+func instructionsForTLSStore(p *obj.Prog) []*instruction {
+       if p.To.Sym.Type != objabi.STLSBSS {
+               p.Ctxt.Diag("%v: %v is not a TLS symbol", p, p.To.Sym)
+               return nil
+       }
+
+       ins := instructionForProg(p)
+       ins.as, ins.rd, ins.rs1, ins.rs2, ins.imm = movToStore(p.As), REG_TMP, uint32(p.From.Reg), obj.REG_NONE, 0
+
+       return instructionsForTLS(p, ins)
+}
+
 // instructionsForMOV returns the machine instructions for an *obj.Prog that
 // uses a MOV pseudo-instruction.
 func instructionsForMOV(p *obj.Prog) []*instruction {
@@ -1939,6 +1986,10 @@ func instructionsForMOV(p *obj.Prog) []*instruction {
                        inss = instructionsForLoad(p, movToLoad(p.As), addrToReg(p.From))
 
                case obj.NAME_EXTERN, obj.NAME_STATIC:
+                       if p.From.Sym.Type == objabi.STLSBSS {
+                               return instructionsForTLSLoad(p)
+                       }
+
                        // Note that the values for $off_hi and $off_lo are currently
                        // zero and will be assigned during relocation.
                        //
@@ -1966,6 +2017,10 @@ func instructionsForMOV(p *obj.Prog) []*instruction {
                        inss = instructionsForStore(p, movToStore(p.As), addrToReg(p.To))
 
                case obj.NAME_EXTERN, obj.NAME_STATIC:
+                       if p.To.Sym.Type == objabi.STLSBSS {
+                               return instructionsForTLSStore(p)
+                       }
+
                        // Note that the values for $off_hi and $off_lo are currently
                        // zero and will be assigned during relocation.
                        //
@@ -2244,10 +2299,10 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
                                break
                        }
                        if addr.Sym.Type == objabi.STLSBSS {
-                               if rt == objabi.R_RISCV_PCREL_ITYPE {
-                                       rt = objabi.R_RISCV_TLS_IE_ITYPE
-                               } else if rt == objabi.R_RISCV_PCREL_STYPE {
-                                       rt = objabi.R_RISCV_TLS_IE_STYPE
+                               if ctxt.Flag_shared {
+                                       rt = objabi.R_RISCV_TLS_IE
+                               } else {
+                                       rt = objabi.R_RISCV_TLS_LE
                                }
                        }
 
index 996c300d955cc19e7d3cd6928dc71592c34a90ef..3eaa5824e67fdf181b97f9f9243268acbaeb4ba2 100644 (file)
@@ -269,21 +269,21 @@ const (
        // only used by the linker and are not emitted by the compiler or assembler.
        R_RISCV_CALL_TRAMP
 
-       // R_RISCV_PCREL_ITYPE resolves a 32-bit PC-relative address using an
+       // R_RISCV_PCREL_ITYPE resolves a 32 bit PC-relative address using an
        // AUIPC + I-type instruction pair.
        R_RISCV_PCREL_ITYPE
 
-       // R_RISCV_PCREL_STYPE resolves a 32-bit PC-relative address using an
+       // R_RISCV_PCREL_STYPE resolves a 32 bit PC-relative address using an
        // AUIPC + S-type instruction pair.
        R_RISCV_PCREL_STYPE
 
-       // R_RISCV_TLS_IE_ITYPE resolves a 32-bit TLS initial-exec TOC offset
-       // address using an AUIPC + I-type instruction pair.
-       R_RISCV_TLS_IE_ITYPE
+       // R_RISCV_TLS_IE resolves a 32 bit TLS initial-exec address using an
+       // AUIPC + I-type instruction pair.
+       R_RISCV_TLS_IE
 
-       // R_RISCV_TLS_IE_STYPE resolves a 32-bit TLS initial-exec TOC offset
-       // address using an AUIPC + S-type instruction pair.
-       R_RISCV_TLS_IE_STYPE
+       // R_RISCV_TLS_LE resolves a 32 bit TLS local-exec address using an
+       // LUI + I-type instruction sequence.
+       R_RISCV_TLS_LE
 
        // R_PCRELDBL relocates s390x 2-byte aligned PC-relative addresses.
        // TODO(mundaym): remove once variants can be serialized - see issue 14218.
index c7441efa28ec65e32cda14fa3583a8edd15d7c10..bc8fb6b73c5884a137ab8cf556704468403a6cfa 100644 (file)
@@ -71,8 +71,8 @@ func _() {
        _ = x[R_RISCV_CALL_TRAMP-61]
        _ = x[R_RISCV_PCREL_ITYPE-62]
        _ = x[R_RISCV_PCREL_STYPE-63]
-       _ = x[R_RISCV_TLS_IE_ITYPE-64]
-       _ = x[R_RISCV_TLS_IE_STYPE-65]
+       _ = x[R_RISCV_TLS_IE-64]
+       _ = x[R_RISCV_TLS_LE-65]
        _ = x[R_PCRELDBL-66]
        _ = x[R_ADDRLOONG64-67]
        _ = x[R_ADDRLOONG64U-68]
@@ -91,9 +91,9 @@ func _() {
        _ = x[R_INITORDER-81]
 }
 
-const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_IE_PCREL_HIR_LOONG64_TLS_IE_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREFR_PEIMAGEOFFR_INITORDER"
+const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IER_RISCV_TLS_LER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_IE_PCREL_HIR_LOONG64_TLS_IE_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREFR_PEIMAGEOFFR_INITORDER"
 
-var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 880, 894, 910, 927, 940, 965, 984, 996, 1007, 1020, 1031, 1043, 1053, 1065, 1076}
+var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 831, 845, 855, 868, 882, 898, 915, 928, 953, 972, 984, 995, 1008, 1019, 1031, 1041, 1053, 1064}
 
 func (i RelocType) String() string {
        i -= 1
index 6b5c0cbe5a3a71306c53862a0ece3da4fe193918..f3186398eb1f3f5651633a3a20f16b4f91c52366 100644 (file)
@@ -39,7 +39,7 @@ func genSymsLate(ctxt *ld.Link, ldr *loader.Loader) {
                for ri := 0; ri < relocs.Count(); ri++ {
                        r := relocs.At(ri)
                        if r.Type() != objabi.R_RISCV_PCREL_ITYPE && r.Type() != objabi.R_RISCV_PCREL_STYPE &&
-                               r.Type() != objabi.R_RISCV_TLS_IE_ITYPE && r.Type() != objabi.R_RISCV_TLS_IE_STYPE {
+                               r.Type() != objabi.R_RISCV_TLS_IE {
                                continue
                        }
                        if r.Off() == 0 && ldr.SymType(s) == sym.STEXT {
@@ -101,7 +101,7 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
                out.Write64(uint64(elf.R_RISCV_JAL) | uint64(elfsym)<<32)
                out.Write64(uint64(r.Xadd))
 
-       case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE:
+       case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE:
                // Find the text symbol for the AUIPC instruction targeted
                // by this relocation.
                relocs := ldr.Relocs(s)
@@ -127,10 +127,8 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
                        hiRel, loRel = elf.R_RISCV_PCREL_HI20, elf.R_RISCV_PCREL_LO12_I
                case objabi.R_RISCV_PCREL_STYPE:
                        hiRel, loRel = elf.R_RISCV_PCREL_HI20, elf.R_RISCV_PCREL_LO12_S
-               case objabi.R_RISCV_TLS_IE_ITYPE:
+               case objabi.R_RISCV_TLS_IE:
                        hiRel, loRel = elf.R_RISCV_TLS_GOT_HI20, elf.R_RISCV_PCREL_LO12_I
-               case objabi.R_RISCV_TLS_IE_STYPE:
-                       hiRel, loRel = elf.R_RISCV_TLS_GOT_HI20, elf.R_RISCV_PCREL_LO12_S
                }
                out.Write64(uint64(sectoff))
                out.Write64(uint64(hiRel) | uint64(elfsym)<<32)
@@ -139,6 +137,14 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
                out.Write64(uint64(loRel) | uint64(hi20ElfSym)<<32)
                out.Write64(uint64(0))
 
+       case objabi.R_RISCV_TLS_LE:
+               out.Write64(uint64(sectoff))
+               out.Write64(uint64(elf.R_RISCV_TPREL_HI20) | uint64(elfsym)<<32)
+               out.Write64(uint64(r.Xadd))
+               out.Write64(uint64(sectoff + 4))
+               out.Write64(uint64(elf.R_RISCV_TPREL_LO12_I) | uint64(elfsym)<<32)
+               out.Write64(uint64(r.Xadd))
+
        default:
                return false
        }
@@ -189,7 +195,7 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
                case objabi.R_RISCV_CALL, objabi.R_RISCV_CALL_TRAMP:
                        return val, 1, true
 
-               case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE:
+               case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE, objabi.R_RISCV_TLS_LE:
                        return val, 2, true
                }
 
@@ -211,7 +217,7 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
 
                return val, 0, true
 
-       case objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE:
+       case objabi.R_RISCV_TLS_IE, objabi.R_RISCV_TLS_LE:
                // TLS relocations are not currently handled for internal linking.
                // For now, TLS is only used when cgo is in use and cgo currently
                // requires external linking. However, we need to accept these
@@ -273,7 +279,7 @@ func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sy
        case objabi.R_RISCV_CALL, objabi.R_RISCV_CALL_TRAMP:
                return ld.ExtrelocSimple(ldr, r), true
 
-       case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE:
+       case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE, objabi.R_RISCV_TLS_LE:
                return ld.ExtrelocViaOuterSym(ldr, r, s), true
        }
        return loader.ExtReloc{}, false
index 397919aeba23635de78464a453de036d84a39ea9..a0a58ea4a0a2548e12921fa2eebdcd1d1f141129 100644 (file)
 // NOTE: mcall() assumes this clobbers only X31 (REG_TMP).
 TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0
        MOVB    runtime·iscgo(SB), X31
-       BEQ     X0, X31, nocgo
-
-       MOV     runtime·tls_g(SB), X31
-       ADD     TP, X31         // add offset to thread pointer (X4)
-       MOV     g, (X31)
+       BEQZ    X31, nocgo
 
+       MOV     g, runtime·tls_g(SB)
 nocgo:
        RET
 
 TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0
-       MOV     runtime·tls_g(SB), X31
-       ADD     TP, X31         // add offset to thread pointer (X4)
-       MOV     (X31), g
+       MOV     runtime·tls_g(SB), g
        RET
 
 GLOBL runtime·tls_g(SB), TLSBSS, $8