From 37682f7a797a2f13dcc5496e506bfd662f6e51fd Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Sat, 20 Jun 2020 14:38:38 -0400 Subject: [PATCH] [dev.link] cmd/link: apply relocations while writing symbols MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We can apply relocations of a symbol right after the symbol data is copied to output buffer. This should help locality and parallelism (parallelizing over blocks, instead of over segments). Linking cmd/compile, Asmb+Reloc 23.9ms ±18% 16.5ms ±11% -30.73% (p=0.008 n=5+5) Linking cmd/compile with external linking, Asmb+Reloc 74.0ms ± 3% 33.8ms ± 8% -54.32% (p=0.008 n=5+5) In external linking mode, allocation goes up slightly, as we do smaller batching now. It doesn't seem too bad. Asmb+Reloc 15.0MB ± 0% 16.7MB ± 0% +11.22% (p=0.008 n=5+5) Change-Id: Ide33d9ff86c39124c8f5cfc050d7badc753a1ced Reviewed-on: https://go-review.googlesource.com/c/go/+/239197 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Jeremy Faller Reviewed-by: Than McIntosh --- src/cmd/link/internal/ld/asmb.go | 12 +++++--- src/cmd/link/internal/ld/data.go | 52 ++++++-------------------------- src/cmd/link/internal/ld/main.go | 5 +-- 3 files changed, 19 insertions(+), 50 deletions(-) diff --git a/src/cmd/link/internal/ld/asmb.go b/src/cmd/link/internal/ld/asmb.go index a9987ba207..d4e358cebc 100644 --- a/src/cmd/link/internal/ld/asmb.go +++ b/src/cmd/link/internal/ld/asmb.go @@ -6,19 +6,23 @@ package ld import ( "cmd/internal/objabi" - "cmd/link/internal/loader" "fmt" "sync" ) // Assembling the binary is broken into two steps: -// - writing out the code/data/dwarf Segments +// - writing out the code/data/dwarf Segments, applying relocations on the fly // - writing out the architecture specific pieces. // This function handles the first part. -func asmb(ctxt *Link, ldr *loader.Loader) { +func asmb(ctxt *Link) { + ctxt.loader.InitOutData() + if ctxt.IsExternal() { + ctxt.loader.InitExtRelocs() + } + // TODO(jfaller): delete me. if thearch.Asmb != nil { - thearch.Asmb(ctxt, ldr) + thearch.Asmb(ctxt, ctxt.loader) return } diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go index 73003179ab..ed7129a156 100644 --- a/src/cmd/link/internal/ld/data.go +++ b/src/cmd/link/internal/ld/data.go @@ -640,41 +640,6 @@ func (ctxt *Link) makeRelocSymState() *relocSymState { } } -func (ctxt *Link) reloc() { - var wg sync.WaitGroup - ldr := ctxt.loader - if ctxt.IsExternal() { - ldr.InitExtRelocs() - } - wg.Add(3) - go func() { - if !ctxt.IsWasm() { // On Wasm, text relocations are applied in Asmb2. - st := ctxt.makeRelocSymState() - for _, s := range ctxt.Textp { - st.relocsym(s, ldr.OutData(s)) - } - } - wg.Done() - }() - go func() { - st := ctxt.makeRelocSymState() - for _, s := range ctxt.datap { - st.relocsym(s, ldr.OutData(s)) - } - wg.Done() - }() - go func() { - st := ctxt.makeRelocSymState() - for _, si := range dwarfp { - for _, s := range si.syms { - st.relocsym(s, ldr.OutData(s)) - } - } - wg.Done() - }() - wg.Wait() -} - func windynrelocsym(ctxt *Link, rel *loader.SymbolBuilder, s loader.Sym) { var su *loader.SymbolBuilder relocs := ctxt.loader.Relocs(s) @@ -801,7 +766,7 @@ func (state *dodataState) dynreloc(ctxt *Link) { } func CodeblkPad(ctxt *Link, out *OutBuf, addr int64, size int64, pad []byte) { - writeBlocks(out, ctxt.outSem, ctxt.loader, ctxt.Textp, addr, size, pad) + writeBlocks(ctxt, out, ctxt.outSem, ctxt.loader, ctxt.Textp, addr, size, pad) } const blockSize = 1 << 20 // 1MB chunks written at a time. @@ -811,7 +776,7 @@ const blockSize = 1 << 20 // 1MB chunks written at a time. // as many goroutines as necessary to accomplish this task. This call then // blocks, waiting on the writes to complete. Note that we use the sem parameter // to limit the number of concurrent writes taking place. -func writeBlocks(out *OutBuf, sem chan int, ldr *loader.Loader, syms []loader.Sym, addr, size int64, pad []byte) { +func writeBlocks(ctxt *Link, out *OutBuf, sem chan int, ldr *loader.Loader, syms []loader.Sym, addr, size int64, pad []byte) { for i, s := range syms { if ldr.SymValue(s) >= addr && !ldr.AttrSubSymbol(s) { syms = syms[i:] @@ -876,12 +841,12 @@ func writeBlocks(out *OutBuf, sem chan int, ldr *loader.Loader, syms []loader.Sy sem <- 1 wg.Add(1) go func(o *OutBuf, ldr *loader.Loader, syms []loader.Sym, addr, size int64, pad []byte) { - writeBlock(o, ldr, syms, addr, size, pad) + writeBlock(ctxt, o, ldr, syms, addr, size, pad) wg.Done() <-sem }(o, ldr, syms, addr, length, pad) } else { // output not mmaped, don't parallelize. - writeBlock(out, ldr, syms, addr, length, pad) + writeBlock(ctxt, out, ldr, syms, addr, length, pad) } // Prepare for the next loop. @@ -894,7 +859,7 @@ func writeBlocks(out *OutBuf, sem chan int, ldr *loader.Loader, syms []loader.Sy wg.Wait() } -func writeBlock(out *OutBuf, ldr *loader.Loader, syms []loader.Sym, addr, size int64, pad []byte) { +func writeBlock(ctxt *Link, out *OutBuf, ldr *loader.Loader, syms []loader.Sym, addr, size int64, pad []byte) { for i, s := range syms { if ldr.SymValue(s) >= addr && !ldr.AttrSubSymbol(s) { syms = syms[i:] @@ -902,6 +867,8 @@ func writeBlock(out *OutBuf, ldr *loader.Loader, syms []loader.Sym, addr, size i } } + st := ctxt.makeRelocSymState() + // This doesn't distinguish the memory size from the file // size, and it lays out the file based on Symbol.Value, which // is the virtual address. DWARF compression changes file sizes, @@ -924,6 +891,7 @@ func writeBlock(out *OutBuf, ldr *loader.Loader, syms []loader.Sym, addr, size i addr = val } out.WriteSym(ldr, s) + st.relocsym(s, ldr.OutData(s)) addr += int64(len(ldr.Data(s))) siz := ldr.SymSize(s) if addr < val+siz { @@ -973,7 +941,7 @@ func DatblkBytes(ctxt *Link, addr int64, size int64) []byte { } func writeDatblkToOutBuf(ctxt *Link, out *OutBuf, addr int64, size int64) { - writeBlocks(out, ctxt.outSem, ctxt.loader, ctxt.datap, addr, size, zeros[:]) + writeBlocks(ctxt, out, ctxt.outSem, ctxt.loader, ctxt.datap, addr, size, zeros[:]) } func dwarfblk(ctxt *Link, out *OutBuf, addr int64, size int64) { @@ -991,7 +959,7 @@ func dwarfblk(ctxt *Link, out *OutBuf, addr int64, size int64) { for i := range dwarfp { syms = append(syms, dwarfp[i].syms...) } - writeBlocks(out, ctxt.outSem, ctxt.loader, syms, addr, size, zeros[:]) + writeBlocks(ctxt, out, ctxt.outSem, ctxt.loader, syms, addr, size, zeros[:]) } var zeros [512]byte diff --git a/src/cmd/link/internal/ld/main.go b/src/cmd/link/internal/ld/main.go index e68997f5a2..252c3c5530 100644 --- a/src/cmd/link/internal/ld/main.go +++ b/src/cmd/link/internal/ld/main.go @@ -315,10 +315,7 @@ func Main(arch *sys.Arch, theArch Arch) { // asmb will redirect symbols to the output file mmap, and relocations // will be applied directly there. bench.Start("Asmb") - ctxt.loader.InitOutData() - asmb(ctxt, ctxt.loader) - bench.Start("reloc") - ctxt.reloc() + asmb(ctxt) bench.Start("Asmb2") asmb2(ctxt) -- 2.50.0