From 017ffcd10d980e7fe6ca101e253a12c9326fba37 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Fri, 1 Oct 2021 16:35:43 -0700 Subject: [PATCH] cmd/link, runtime: convert FUNCDATA relocations to offsets MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Every function has associated numbered extra funcdata to another symbol. Prior to this change, a funcdata pointer was stored as a relocation. This change alters this to be an offset relative to go.func.* or go.funcrel.*. This reduces the number of relocations on darwin/arm64 by about 40%. It also shrinks externally linked binaries. On darwin/arm64: size before after Δ % addr2line 3788498 3699730 -88768 -2.343% api 5100018 4951074 -148944 -2.920% asm 4855234 4744274 -110960 -2.285% buildid 2500162 2419986 -80176 -3.207% cgo 4338258 4218306 -119952 -2.765% compile 22764418 22132226 -632192 -2.777% cover 4583186 4432770 -150416 -3.282% dist 3200962 3094626 -106336 -3.322% doc 3680402 3583602 -96800 -2.630% fix 3114914 3023922 -90992 -2.921% link 6308578 6154786 -153792 -2.438% nm 3754338 3665826 -88512 -2.358% objdump 4124738 4015234 -109504 -2.655% pack 2232626 2155010 -77616 -3.476% pprof 13497474 13044066 -453408 -3.359% test2json 2483810 2402146 -81664 -3.288% trace 10108898 9748802 -360096 -3.562% vet 6884322 6681314 -203008 -2.949% total 107320836 104167700 -3153136 -2.938% relocs before after Δ % addr2line 33357 25563 -7794 -23.365% api 31589 18409 -13180 -41.723% asm 27825 18904 -8921 -32.061% buildid 15603 9513 -6090 -39.031% cgo 27809 17103 -10706 -38.498% compile 114769 64829 -49940 -43.513% cover 32932 19462 -13470 -40.902% dist 18797 10796 -8001 -42.565% doc 22891 13503 -9388 -41.012% fix 19700 11465 -8235 -41.802% link 37324 23198 -14126 -37.847% nm 33226 25480 -7746 -23.313% objdump 35237 26610 -8627 -24.483% pack 13535 7951 -5584 -41.256% pprof 97986 63961 -34025 -34.724% test2json 15113 8735 -6378 -42.202% trace 66786 39636 -27150 -40.652% vet 43328 25971 -17357 -40.060% total 687806 431088 -256718 -37.324% It should also incrementally speed up binary launching and may reduce linker memory use. This is another step towards removing relocations so that pages that were previously dirtied by the loader may remain clean, which will offer memory savings useful in constrained environments like iOS. Removing the relocations in .stkobj symbols will allow some simplifications. There will be no references into go.funcrel.*, so we will no longer need to use the bottom bit to distinguish offset bases. Change-Id: I83d34c1701d6f3f515b9905941477d522441019d Reviewed-on: https://go-review.googlesource.com/c/go/+/352110 Trust: Josh Bleecher Snyder Run-TryBot: Josh Bleecher Snyder TryBot-Result: Go Bot Reviewed-by: Cherry Mui --- src/cmd/link/internal/ld/pcln.go | 90 +++++++++++++----------------- src/cmd/link/internal/ld/symtab.go | 6 ++ src/runtime/symtab.go | 14 ++++- 3 files changed, 58 insertions(+), 52 deletions(-) diff --git a/src/cmd/link/internal/ld/pcln.go b/src/cmd/link/internal/ld/pcln.go index 7506bf17a3..b041174cfe 100644 --- a/src/cmd/link/internal/ld/pcln.go +++ b/src/cmd/link/internal/ld/pcln.go @@ -552,11 +552,8 @@ type pclnSetUint func(*loader.SymbolBuilder, *sys.Arch, int64, uint64) int64 // // Because of timing in the linker, generating this table takes two passes. // The first pass is executed early in the link, and it creates any needed -// relocations to lay out the data. The pieces that need relocations are: -// 1) the PC->func table. -// 2) The funcdata. -// (1) is handled in writePCToFunc. (2) is handled in writeFuncdata. -// +// relocations to lay out the data. The piece that needs relocations is +// the PC->func table, handled in writePCToFunc. // After relocations, once we know where to write things in the output buffer, // we execute the second pass, which is actually writing the data. func (state *pclntab) generateFunctab(ctxt *Link, funcs []loader.Sym, inlSyms map[loader.Sym]loader.Sym, cuOffsets []uint32, nameOffsets map[loader.Sym]uint32) { @@ -592,7 +589,6 @@ func (state *pclntab) generateFunctab(ctxt *Link, funcs []loader.Sym, inlSyms ma // Write the data. writePCToFunc(ctxt, sb, funcs, startLocations, setAddr, (*loader.SymbolBuilder).SetUint) writeFuncs(ctxt, sb, funcs, inlSyms, startLocations, cuOffsets, nameOffsets) - state.writeFuncData(ctxt, sb, funcs, inlSyms, startLocations, setAddr, (*loader.SymbolBuilder).SetUint) } state.pclntab = state.addGeneratedSym(ctxt, "runtime.functab", size, writePcln) @@ -616,11 +612,6 @@ func (state *pclntab) generateFunctab(ctxt *Link, funcs []loader.Sym, inlSyms ma } setUintNOP := func(*loader.SymbolBuilder, *sys.Arch, int64, uint64) int64 { return 0 } writePCToFunc(ctxt, sb, funcs, startLocations, setAddr, setUintNOP) - if !useSymValue { - // Generate relocations for funcdata when externally linking. - state.writeFuncData(ctxt, sb, funcs, inlSyms, startLocations, setAddr, setUintNOP) - sb.SortRelocs() - } } // funcData returns the funcdata and offsets for the FuncInfo. @@ -675,7 +666,7 @@ func (state pclntab) calculateFunctabSize(ctxt *Link, funcs []loader.Sym) (int64 if numFuncData > 0 { // Func data is aligned. size = Rnd(size, int64(ctxt.Arch.PtrSize)) } - size += int64(numFuncData * ctxt.Arch.PtrSize) + size += int64(numFuncData * 4) } } @@ -715,49 +706,12 @@ func writePCToFunc(ctxt *Link, sb *loader.SymbolBuilder, funcs []loader.Sym, sta setAddr(sb, ctxt.Arch, int64(funcIndex)*2*int64(ctxt.Arch.PtrSize), prevFunc, ldr.SymSize(prevFunc)) } -// writeFuncData writes the funcdata tables. -// -// This function executes a callback for each funcdata needed in -// runtime.functab. It should be called once for internally linked static -// binaries, or twice (once to generate the needed relocations) for other -// build modes. -// -// Note the output of this function is interwoven with writeFuncs, but this is -// a separate function, because it's needed in different passes in -// generateFunctab. -func (state *pclntab) writeFuncData(ctxt *Link, sb *loader.SymbolBuilder, funcs []loader.Sym, inlSyms map[loader.Sym]loader.Sym, startLocations []uint32, setAddr pclnSetAddr, setUint pclnSetUint) { - ldr := ctxt.loader - funcdata := []loader.Sym{} - for i, s := range funcs { - fi := ldr.FuncInfo(s) - if !fi.Valid() { - continue - } - fi.Preload() - - // funcdata, must be pointer-aligned and we're only int32-aligned. - // Missing funcdata will be 0 (nil pointer). - funcdata = funcData(ldr, s, fi, inlSyms[s], funcdata) - if len(funcdata) > 0 { - off := int64(startLocations[i] + funcSize + numPCData(ldr, s, fi)*4) - off = Rnd(off, int64(ctxt.Arch.PtrSize)) - for j := range funcdata { - dataoff := off + int64(ctxt.Arch.PtrSize*j) - if funcdata[j] == 0 { - setUint(sb, ctxt.Arch, dataoff, 0) - continue - } - // TODO: Does this need deduping? - setAddr(sb, ctxt.Arch, dataoff, funcdata[j], 0) - } - } - } -} - // writeFuncs writes the func structures and pcdata to runtime.functab. func writeFuncs(ctxt *Link, sb *loader.SymbolBuilder, funcs []loader.Sym, inlSyms map[loader.Sym]loader.Sym, startLocations, cuOffsets []uint32, nameOffsets map[loader.Sym]uint32) { ldr := ctxt.loader deferReturnSym := ldr.Lookup("runtime.deferreturn", abiInternalVer) + gofunc := ldr.Lookup("go.func.*", 0) + gofuncrel := ldr.Lookup("go.funcrel.*", 0) textStart := ldr.SymValue(ldr.Lookup("runtime.text", 0)) funcdata := []loader.Sym{} var pcsp, pcfile, pcline, pcinline loader.Sym @@ -844,6 +798,40 @@ func writeFuncs(ctxt *Link, sb *loader.SymbolBuilder, funcs []loader.Sym, inlSym sb.SetUint32(ctxt.Arch, int64(off+objabi.PCDATA_InlTreeIndex*4), uint32(ldr.SymValue(pcinline))) } } + + // Write funcdata refs as offsets from go.func.* and go.funcrel.*. + funcdata = funcData(ldr, s, fi, inlSyms[s], funcdata) + // funcdata must be pointer-aligned and we're only int32-aligned. + // Missing funcdata will be ^0. See runtime/symtab.go:funcdata. + off = uint32(startLocations[i] + funcSize + numPCData(ldr, s, fi)*4) + off = uint32(Rnd(int64(off), int64(ctxt.Arch.PtrSize))) + for j := range funcdata { + dataoff := off + uint32(4*j) + fdsym := funcdata[j] + if fdsym == 0 { + sb.SetUint32(ctxt.Arch, int64(dataoff), ^uint32(0)) // ^0 is a sentinel for "no value" + continue + } + + outer := ldr.OuterSym(fdsym) + if outer == 0 { + panic(fmt.Sprintf("no carrier sym for symbol %s (funcdata %s#%d)", ldr.SymName(fdsym), ldr.SymName(s), j)) + } + rel := uint32(ldr.SymValue(fdsym) - ldr.SymValue(outer)) + // Record gofunc vs gofuncrel in bottom bit. See runtime/symtab.go:funcdata. + // TODO: The only symbols that in gofuncrel are .stkobj symbols. + // Remove those relocations, and simplify this. + rel <<= 1 + switch outer { + case gofunc: + case gofuncrel: + rel |= 1 + default: + panic(fmt.Sprintf("expected symbol %s (funcdata %s#%d) to be placed in go.func.* or go.funcrel.*, got %s (%d)", + ldr.SymName(fdsym), ldr.SymName(s), j, ldr.SymName(outer), outer)) + } + sb.SetUint32(ctxt.Arch, int64(dataoff), rel) + } } } diff --git a/src/cmd/link/internal/ld/symtab.go b/src/cmd/link/internal/ld/symtab.go index 7fddc59bb5..c582e4908d 100644 --- a/src/cmd/link/internal/ld/symtab.go +++ b/src/cmd/link/internal/ld/symtab.go @@ -676,6 +676,12 @@ func (ctxt *Link) symtab(pcln *pclntab) []sym.SymKind { moduledata.AddAddr(ctxt.Arch, ldr.Lookup("runtime.gcbss", 0)) moduledata.AddAddr(ctxt.Arch, ldr.Lookup("runtime.types", 0)) moduledata.AddAddr(ctxt.Arch, ldr.Lookup("runtime.etypes", 0)) + moduledata.AddAddr(ctxt.Arch, ldr.Lookup("go.func.*", 0)) + if gofuncrel := ldr.Lookup("go.funcrel.*", 0); gofuncrel != 0 { + moduledata.AddAddr(ctxt.Arch, gofuncrel) + } else { + moduledata.AddUint(ctxt.Arch, 0) + } if ctxt.IsAIX() && ctxt.IsExternal() { // Add R_XCOFFREF relocation to prevent ld's garbage collection of diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go index 14591602a3..8f7b439dc5 100644 --- a/src/runtime/symtab.go +++ b/src/runtime/symtab.go @@ -427,6 +427,7 @@ type moduledata struct { noptrbss, enoptrbss uintptr end, gcdata, gcbss uintptr types, etypes uintptr + gofunc, gofuncrel uintptr // go.func.*, go.funcrel.* textsectmap []textsect typelinks []int32 // offsets from types @@ -1073,6 +1074,8 @@ func pcdatavalue2(f funcInfo, table uint32, targetpc uintptr) (int32, uintptr) { return pcvalue(f, pcdatastart(f, table), targetpc, nil, true) } +// funcdata returns a pointer to the ith funcdata for f. +// funcdata should be kept in sync with cmd/link:writeFuncs. func funcdata(f funcInfo, i uint8) unsafe.Pointer { if i < 0 || i >= f.nfuncdata { return nil @@ -1084,7 +1087,16 @@ func funcdata(f funcInfo, i uint8) unsafe.Pointer { } p = add(p, 4) } - return *(*unsafe.Pointer)(add(p, uintptr(i)*goarch.PtrSize)) + p = add(p, uintptr(i)*4) + off := *(*uint32)(p) + if off == ^uint32(0) { + return nil + } + base := f.datap.gofunc + if off&1 != 0 { + base = f.datap.gofuncrel + } + return unsafe.Pointer(base + uintptr(off>>1)) } // step advances to the next pc, value pair in the encoded table. -- 2.48.1