From d1ef967306b401474aafb2a0d351e972bdfeb54c Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Sat, 11 Sep 2021 02:03:10 +1000 Subject: [PATCH] cmd/link/internal/ld: assign temporary addresses to per-package text If trampolines may be required, the current text addressing second pass resets all assigned addresses, before assigning addresses and laying down trampolines in a linear fashion. However, this approach means that intra-package calls are to a symbol that has not yet been assigned an address, when the symbol is ahead of the current function. In the case of RISC-V the JAL instruction is limited to +/-1MiB. As such, if a call is to a symbol with no address currently assigned, we have to assume that a trampoline will be required. During the relocation phase we can fix up and avoid trampolines in some cases, however this results in unused trampolines that are still present in the binary (since removing them would change text addresses). In order to significantly reduce the number of unused trampolines, assign temporary addresses to functions within the same package, based on the maximum number of trampolines that may be required by a function. This allows for better decisions to be made regarding the requirement for intra-package trampolines, as we reset the addressing for a function, assign its final address and lay down any resulting trampolines. This results in ~2,300 unused trampolines being removed from the Go binary and ~5,600 unused trampolines being removed from the compile binary, on linux/riscv64. This reapplies CL 349650, however does not pass big to assignAddress when assigning temporary addresses, as this can result in side effects such as section splitting. Change-Id: Id7febdb65d962d6b1297a91294a8dc27c94d8696 Reviewed-on: https://go-review.googlesource.com/c/go/+/534760 Reviewed-by: Cherry Mui Reviewed-by: Dmitri Shuralyov Run-TryBot: Joel Sing TryBot-Result: Gopher Robot Reviewed-by: Than McIntosh --- src/cmd/link/internal/ld/data.go | 89 +++++++++++++++++++++-------- src/cmd/link/internal/ld/ld_test.go | 68 ++++++++++++++++++++++ 2 files changed, 132 insertions(+), 25 deletions(-) diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go index dbaf686212..8534a55111 100644 --- a/src/cmd/link/internal/ld/data.go +++ b/src/cmd/link/internal/ld/data.go @@ -84,14 +84,15 @@ func maxSizeTrampolines(ctxt *Link, ldr *loader.Loader, s loader.Sym, isTramp bo } } - if ctxt.IsARM() { + switch { + case ctxt.IsARM(): return n * 20 // Trampolines in ARM range from 3 to 5 instructions. - } - if ctxt.IsPPC64() { - return n * 16 // Trampolines in PPC64 are 4 instructions. - } - if ctxt.IsARM64() { + case ctxt.IsARM64(): return n * 12 // Trampolines in ARM64 are 3 instructions. + case ctxt.IsPPC64(): + return n * 16 // Trampolines in PPC64 are 4 instructions. + case ctxt.IsRISCV64(): + return n * 8 // Trampolines in RISCV64 are 2 instructions. } panic("unreachable") } @@ -118,18 +119,21 @@ func trampoline(ctxt *Link, s loader.Sym) { continue // something is wrong. skip it here and we'll emit a better error later } - // RISC-V is only able to reach +/-1MiB via a JAL instruction, - // which we can readily exceed in the same package. As such, we - // need to generate trampolines when the address is unknown. - if ldr.SymValue(rs) == 0 && !ctxt.Target.IsRISCV64() && ldr.SymType(rs) != sym.SDYNIMPORT && ldr.SymType(rs) != sym.SUNDEFEXT { + if ldr.SymValue(rs) == 0 && ldr.SymType(rs) != sym.SDYNIMPORT && ldr.SymType(rs) != sym.SUNDEFEXT { + // Symbols in the same package are laid out together. + // Except that if SymPkg(s) == "", it is a host object symbol + // which may call an external symbol via PLT. if ldr.SymPkg(s) != "" && ldr.SymPkg(rs) == ldr.SymPkg(s) { - // Symbols in the same package are laid out together. - // Except that if SymPkg(s) == "", it is a host object symbol - // which may call an external symbol via PLT. - continue + // RISC-V is only able to reach +/-1MiB via a JAL instruction. + // We need to generate a trampoline when an address is + // currently unknown. + if !ctxt.Target.IsRISCV64() { + continue + } } + // Runtime packages are laid out together. if isRuntimeDepPkg(ldr.SymPkg(s)) && isRuntimeDepPkg(ldr.SymPkg(rs)) { - continue // runtime packages are laid out together + continue } } thearch.Trampoline(ctxt, ldr, ri, rs, s) @@ -2435,8 +2439,8 @@ func (ctxt *Link) textaddress() { limit = 1 } - // First pass: assign addresses assuming the program is small and - // don't generate trampolines. + // First pass: assign addresses assuming the program is small and will + // not require trampoline generation. big := false for _, s := range ctxt.Textp { sect, n, va = assignAddress(ctxt, sect, n, s, va, false, big) @@ -2451,21 +2455,45 @@ func (ctxt *Link) textaddress() { if big { // reset addresses for _, s := range ctxt.Textp { - if ldr.OuterSym(s) != 0 || s == text { - continue - } - oldv := ldr.SymValue(s) - for sub := s; sub != 0; sub = ldr.SubSym(sub) { - ldr.SetSymValue(sub, ldr.SymValue(sub)-oldv) + if s != text { + resetAddress(ctxt, s) } } va = start ntramps := 0 - for _, s := range ctxt.Textp { + var curPkg string + for i, s := range ctxt.Textp { + // When we find the first symbol in a package, perform a + // single iteration that assigns temporary addresses to all + // of the text in the same package, using the maximum possible + // number of trampolines. This allows for better decisions to + // be made regarding reachability and the need for trampolines. + if symPkg := ldr.SymPkg(s); symPkg != "" && curPkg != symPkg { + curPkg = symPkg + vaTmp := va + for j := i; j < len(ctxt.Textp); j++ { + curSym := ctxt.Textp[j] + if symPkg := ldr.SymPkg(curSym); symPkg == "" || curPkg != symPkg { + break + } + // We do not pass big to assignAddress here, as this + // can result in side effects such as section splitting. + sect, n, vaTmp = assignAddress(ctxt, sect, n, curSym, vaTmp, false, false) + vaTmp += maxSizeTrampolines(ctxt, ldr, curSym, false) + } + } + + // Reset address for current symbol. + if s != text { + resetAddress(ctxt, s) + } + + // Assign actual address for current symbol. sect, n, va = assignAddress(ctxt, sect, n, s, va, false, big) - trampoline(ctxt, s) // resolve jumps, may add trampolines if jump too far + // Resolve jumps, adding trampolines if they are needed. + trampoline(ctxt, s) // lay down trampolines after each function for ; ntramps < len(ctxt.tramps); ntramps++ { @@ -2613,6 +2641,17 @@ func assignAddress(ctxt *Link, sect *sym.Section, n int, s loader.Sym, va uint64 return sect, n, va } +func resetAddress(ctxt *Link, s loader.Sym) { + ldr := ctxt.loader + if ldr.OuterSym(s) != 0 { + return + } + oldv := ldr.SymValue(s) + for sub := s; sub != 0; sub = ldr.SubSym(sub) { + ldr.SetSymValue(sub, ldr.SymValue(sub)-oldv) + } +} + // Return whether we may need to split text sections. // // On PPC64x, when external linking, a text section should not be diff --git a/src/cmd/link/internal/ld/ld_test.go b/src/cmd/link/internal/ld/ld_test.go index a7a6082f54..1767667759 100644 --- a/src/cmd/link/internal/ld/ld_test.go +++ b/src/cmd/link/internal/ld/ld_test.go @@ -344,3 +344,71 @@ func main() { }) } } + +func TestRISCVTrampolines(t *testing.T) { + testenv.MustHaveGoBuild(t) + t.Parallel() + + tmpDir := t.TempDir() + tmpFile := filepath.Join(tmpDir, "x.s") + + // Calling b from a or c should not use trampolines, however + // calling from d to a will require one. + buf := new(bytes.Buffer) + fmt.Fprintf(buf, "TEXT a(SB),$0-0\n") + for i := 0; i < 1<<17; i++ { + fmt.Fprintf(buf, "\tADD $0, X0, X0\n") + } + fmt.Fprintf(buf, "\tCALL b(SB)\n") + fmt.Fprintf(buf, "\tRET\n") + fmt.Fprintf(buf, "TEXT b(SB),$0-0\n") + fmt.Fprintf(buf, "\tRET\n") + fmt.Fprintf(buf, "TEXT c(SB),$0-0\n") + fmt.Fprintf(buf, "\tCALL b(SB)\n") + fmt.Fprintf(buf, "\tRET\n") + fmt.Fprintf(buf, "TEXT ·d(SB),0,$0-0\n") + for i := 0; i < 1<<17; i++ { + fmt.Fprintf(buf, "\tADD $0, X0, X0\n") + } + fmt.Fprintf(buf, "\tCALL a(SB)\n") + fmt.Fprintf(buf, "\tCALL c(SB)\n") + fmt.Fprintf(buf, "\tRET\n") + if err := os.WriteFile(tmpFile, buf.Bytes(), 0644); err != nil { + t.Fatalf("Failed to write assembly file: %v", err) + } + + if err := os.WriteFile(filepath.Join(tmpDir, "go.mod"), []byte("module riscvtramp"), 0644); err != nil { + t.Fatalf("Failed to write file: %v\n", err) + } + main := `package main +func main() { + d() +} + +func d() +` + if err := os.WriteFile(filepath.Join(tmpDir, "x.go"), []byte(main), 0644); err != nil { + t.Fatalf("failed to write main: %v\n", err) + } + cmd := testenv.Command(t, testenv.GoToolPath(t), "build", "-ldflags=-linkmode=internal") + cmd.Dir = tmpDir + cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux") + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("Build failed: %v, output: %s", err, out) + } + + // Check what trampolines exist. + cmd = testenv.Command(t, testenv.GoToolPath(t), "tool", "nm", filepath.Join(tmpDir, "riscvtramp")) + cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux") + out, err = cmd.CombinedOutput() + if err != nil { + t.Fatalf("nm failure: %s\n%s\n", err, string(out)) + } + if !bytes.Contains(out, []byte(" T a-tramp0")) { + t.Errorf("Trampoline a-tramp0 is missing") + } + if bytes.Contains(out, []byte(" T b-tramp0")) { + t.Errorf("Trampoline b-tramp0 exists unnecessarily") + } +} -- 2.50.0