]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/loong64, cmd/link/internal: switch to LoongArch ELF psABI v2 relocs
authorWANG Xuerui <git@xen0n.name>
Sat, 3 Dec 2022 13:16:49 +0000 (21:16 +0800)
committerGopher Robot <gobot@golang.org>
Mon, 10 Apr 2023 15:50:11 +0000 (15:50 +0000)
The LoongArch ELF psABI v2 [1] relocs are vastly simplified from the v1
which involved a stack machine for computing the reloc values, but the
details of PC-relative addressing are changed as well. Specifically, the
`pcaddu12i` instruction is substituted with the `pcalau12i`, which is
like arm64's `adrp` -- meaning the lower bits of a symbol's address now
have to be absolute and not PC-relative.

However, apart from the little bit of added complexity, the obvious
advantage is that only 1 reloc needs to be emitted for every kind of
external reloc we care about. This can mean substantial space savings
(each RELA reloc occupies 24 bytes), and no open-coded stack ops has to
remain any more.

While at it, update the preset value for the output ELF's flags to
indicate the psABI update.

Fixes #58784

[1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html

Change-Id: I5c13bc710eaf58293a32e930dd33feff2ef14c28
Reviewed-on: https://go-review.googlesource.com/c/go/+/455017
Run-TryBot: Ben Shi <powerman1st@163.com>
Reviewed-by: xiaodong liu <teaofmoli@gmail.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Auto-Submit: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
src/cmd/asm/internal/asm/testdata/loong64enc2.s
src/cmd/internal/obj/loong64/asm.go
src/cmd/link/internal/ld/elf.go
src/cmd/link/internal/loong64/asm.go

index 3b5e3cb81ae8971e3793960e7d16fbaa58f3f01a..00768365b698e560d586952e68483e1db4db87aa 100644 (file)
@@ -61,22 +61,22 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
        XOR     $-1, R4                 // 1efcbf0284f81500
        MOVH    R4, R5                  // 85c04000a5c04800
 
-       // relocation   instructions
-       MOVW    R4, name(SB)            // 1e00001cc4038029
-       MOVWU   R4, name(SB)            // 1e00001cc4038029
-       MOVV    R4, name(SB)            // 1e00001cc403c029
-       MOVB    R4, name(SB)            // 1e00001cc4030029
-       MOVBU   R4, name(SB)            // 1e00001cc4030029
-       MOVF    F4, name(SB)            // 1e00001cc403402b
-       MOVD    F4, name(SB)            // 1e00001cc403c02b
-       MOVW    name(SB), R4            // 1e00001cc4038028
-       MOVWU   name(SB), R4            // 1e00001cc403802a
-       MOVV    name(SB), R4            // 1e00001cc403c028
-       MOVB    name(SB), R4            // 1e00001cc4030028
-       MOVBU   name(SB), R4            // 1e00001cc403002a
-       MOVF    name(SB), F4            // 1e00001cc403002b
-       MOVD    name(SB), F4            // 1e00001cc403802b
-       MOVH    R4, name(SB)            // 1e00001cc4034029
-       MOVH    name(SB), R4            // 1e00001cc4034028
-       MOVHU   R4, name(SB)            // 1e00001cc4034029
-       MOVHU   name(SB), R4            // 1e00001cc403402a
+       // relocation instructions
+       MOVW    R4, name(SB)            // 1e00001ac4038029
+       MOVWU   R4, name(SB)            // 1e00001ac4038029
+       MOVV    R4, name(SB)            // 1e00001ac403c029
+       MOVB    R4, name(SB)            // 1e00001ac4030029
+       MOVBU   R4, name(SB)            // 1e00001ac4030029
+       MOVF    F4, name(SB)            // 1e00001ac403402b
+       MOVD    F4, name(SB)            // 1e00001ac403c02b
+       MOVW    name(SB), R4            // 1e00001ac4038028
+       MOVWU   name(SB), R4            // 1e00001ac403802a
+       MOVV    name(SB), R4            // 1e00001ac403c028
+       MOVB    name(SB), R4            // 1e00001ac4030028
+       MOVBU   name(SB), R4            // 1e00001ac403002a
+       MOVF    name(SB), F4            // 1e00001ac403002b
+       MOVD    name(SB), F4            // 1e00001ac403802b
+       MOVH    R4, name(SB)            // 1e00001ac4034029
+       MOVH    name(SB), R4            // 1e00001ac4034028
+       MOVHU   R4, name(SB)            // 1e00001ac4034029
+       MOVHU   name(SB), R4            // 1e00001ac403402a
index e23165a87406d02244501d51b6bddb1f2eda88f2..75b9302f24d16dd0dd9b54435f267a607393a7ee 100644 (file)
@@ -1540,8 +1540,8 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
                o1 = c.oprrr(ABREAK)
 
        // relocation operations
-       case 50: // mov r,addr ==> pcaddu12i + sw
-               o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(REGTMP))
+       case 50: // mov r,addr ==> pcalau12i + sw
+               o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP))
                rel := obj.Addrel(c.cursym)
                rel.Off = int32(c.pc)
                rel.Siz = 4
@@ -1557,8 +1557,8 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
                rel2.Add = p.To.Offset
                rel2.Type = objabi.R_ADDRLOONG64
 
-       case 51: // mov addr,r ==> pcaddu12i + lw
-               o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(REGTMP))
+       case 51: // mov addr,r ==> pcalau12i + lw
+               o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP))
                rel := obj.Addrel(c.cursym)
                rel.Off = int32(c.pc)
                rel.Siz = 4
@@ -1576,7 +1576,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
        case 52: // mov $lext, r
                // NOTE: this case does not use REGTMP. If it ever does,
                // remove the NOTUSETMP flag in optab.
-               o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(p.To.Reg))
+               o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(p.To.Reg))
                rel := obj.Addrel(c.cursym)
                rel.Off = int32(c.pc)
                rel.Siz = 4
index 842570d5ef3a46badec1afe24e7f4c84c283646b..cc387da68b1bcebf6fa4a615d0ecd0c980cbf714 100644 (file)
@@ -254,7 +254,7 @@ func Elfinit(ctxt *Link) {
                        ehdr.Flags = 0x20000004 /* MIPS 3 CPIC */
                }
                if ctxt.Arch.Family == sys.Loong64 {
-                       ehdr.Flags = 0x3 /* LoongArch lp64d */
+                       ehdr.Flags = 0x43 /* DOUBLE_FLOAT, OBJABI_V1 */
                }
                if ctxt.Arch.Family == sys.RISCV64 {
                        ehdr.Flags = 0x4 /* RISCV Float ABI Double */
index 0eb3a813b2140a26066fccd458d2f38f69197d49..61b6efe92ccadc827bb0a1badb7640bbe3ccf502 100644 (file)
@@ -46,100 +46,28 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
                }
        case objabi.R_ADDRLOONG64TLS:
                out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_PUSH_TLS_TPREL) | uint64(elfsym)<<32)
+               out.Write64(uint64(elf.R_LARCH_TLS_LE_LO12) | uint64(elfsym)<<32)
                out.Write64(uint64(r.Xadd))
 
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE))
-               out.Write64(uint64(0xfff))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_AND))
-               out.Write64(uint64(0x0))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_POP_32_U_10_12))
-               out.Write64(uint64(0x0))
-
        case objabi.R_ADDRLOONG64TLSU:
                out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_PUSH_TLS_TPREL) | uint64(elfsym)<<32)
+               out.Write64(uint64(elf.R_LARCH_TLS_LE_HI20) | uint64(elfsym)<<32)
                out.Write64(uint64(r.Xadd))
 
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE))
-               out.Write64(uint64(0xc))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_SR))
-               out.Write64(uint64(0x0))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_5_20) | uint64(0)<<32)
-               out.Write64(uint64(0x0))
-
        case objabi.R_CALLLOONG64:
                out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PLT_PCREL) | uint64(elfsym)<<32)
+               out.Write64(uint64(elf.R_LARCH_B26) | uint64(elfsym)<<32)
                out.Write64(uint64(r.Xadd))
 
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_0_10_10_16_S2))
-               out.Write64(uint64(0x0))
-       // The pcaddu12i + addi.d instructions is used to obtain address of a symbol on Loong64.
-       // The low 12-bit of the symbol address need to be added. The addi.d instruction have
-       // signed 12-bit immediate operand. The 0x800 (addr+U12 <=> addr+0x800+S12) is introduced
-       // to do sign extending from 12 bits. The 0x804 is 0x800 + 4, 4 is instruction bit
-       // width on Loong64 and is used to correct the PC of the addi.d instruction.
        case objabi.R_ADDRLOONG64:
                out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PCREL) | uint64(elfsym)<<32)
-               out.Write64(uint64(r.Xadd + 0x4))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PCREL) | uint64(elfsym)<<32)
-               out.Write64(uint64(r.Xadd + 0x804))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE))
-               out.Write64(uint64(0xc))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_SR))
-               out.Write64(uint64(0x0))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE))
-               out.Write64(uint64(0xc))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_SL))
-               out.Write64(uint64(0x0))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_SUB))
-               out.Write64(uint64(0x0))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_10_12))
-               out.Write64(uint64(0x0))
+               out.Write64(uint64(elf.R_LARCH_PCALA_LO12) | uint64(elfsym)<<32)
+               out.Write64(uint64(r.Xadd))
 
        case objabi.R_ADDRLOONG64U:
                out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PCREL) | uint64(elfsym)<<32)
-               out.Write64(uint64(r.Xadd + 0x800))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE))
-               out.Write64(uint64(0xc))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_SR))
-               out.Write64(uint64(0x0))
-
-               out.Write64(uint64(sectoff))
-               out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_5_20) | uint64(0)<<32)
-               out.Write64(uint64(0x0))
+               out.Write64(uint64(elf.R_LARCH_PCALA_HI20) | uint64(elfsym)<<32)
+               out.Write64(uint64(r.Xadd))
        }
 
        return true
@@ -156,7 +84,6 @@ func machoreloc1(*sys.Arch, *ld.OutBuf, *loader.Loader, loader.Sym, loader.ExtRe
 func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loader.Reloc, s loader.Sym, val int64) (o int64, nExtReloc int, ok bool) {
        rs := r.Sym()
        if target.IsExternal() {
-               nExtReloc := 0
                switch r.Type() {
                default:
                        return val, 0, false
@@ -168,20 +95,12 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
                        if rst != sym.SHOSTOBJ && rst != sym.SDYNIMPORT && ldr.SymSect(rs) == nil {
                                ldr.Errorf(s, "missing section for %s", ldr.SymName(rs))
                        }
-                       nExtReloc = 8 // need 8 ELF relocations. see elfreloc1
-                       if r.Type() == objabi.R_ADDRLOONG64U {
-                               nExtReloc = 4
-                       }
-                       return val, nExtReloc, true
+                       return val, 1, true
                case objabi.R_ADDRLOONG64TLS,
                        objabi.R_ADDRLOONG64TLSU,
                        objabi.R_CALLLOONG64,
                        objabi.R_JMPLOONG64:
-                       nExtReloc = 4
-                       if r.Type() == objabi.R_CALLLOONG64 || r.Type() == objabi.R_JMPLOONG64 {
-                               nExtReloc = 2
-                       }
-                       return val, nExtReloc, true
+                       return val, 1, true
                }
        }
 
@@ -196,11 +115,11 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
        case objabi.R_ADDRLOONG64,
                objabi.R_ADDRLOONG64U:
                pc := ldr.SymValue(s) + int64(r.Off())
-               t := ldr.SymAddr(rs) + r.Add() - pc
+               t := calculatePCAlignedReloc(r.Type(), ldr.SymAddr(rs)+r.Add(), pc)
                if r.Type() == objabi.R_ADDRLOONG64 {
-                       return int64(val&0xffc003ff | (((t + 4 - ((t + 4 + 1<<11) >> 12 << 12)) << 10) & 0x3ffc00)), noExtReloc, isOk
+                       return int64(val&0xffc003ff | (t << 10)), noExtReloc, isOk
                }
-               return int64(val&0xfe00001f | (((t + 1<<11) >> 12 << 5) & 0x1ffffe0)), noExtReloc, isOk
+               return int64(val&0xfe00001f | (t << 5)), noExtReloc, isOk
        case objabi.R_ADDRLOONG64TLS,
                objabi.R_ADDRLOONG64TLSU:
                t := ldr.SymAddr(rs) + r.Add()
@@ -238,3 +157,33 @@ func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sy
        }
        return loader.ExtReloc{}, false
 }
+
+func isRequestingLowPageBits(t objabi.RelocType) bool {
+       switch t {
+       case objabi.R_ADDRLOONG64:
+               return true
+       }
+       return false
+}
+
+// Calculates the value to put into the immediate slot, according to the
+// desired relocation type, target and PC.
+// The value to use varies based on the reloc type. Namely, the absolute low
+// bits of the target are to be used for the low part, while the page-aligned
+// offset is to be used for the higher part. A "page" here is not related to
+// the system's actual page size, but rather a fixed 12-bit range (designed to
+// cooperate with ADDI/LD/ST's 12-bit immediates).
+func calculatePCAlignedReloc(t objabi.RelocType, tgt int64, pc int64) int64 {
+       if isRequestingLowPageBits(t) {
+               // corresponding immediate field is 12 bits wide
+               return tgt & 0xfff
+       }
+
+       pageDelta := (tgt >> 12) - (pc >> 12)
+       if tgt&0xfff >= 0x800 {
+               // adjust for sign-extended addition of the low bits
+               pageDelta += 1
+       }
+       // corresponding immediate field is 20 bits wide
+       return pageDelta & 0xfffff
+}