From 987ce938245566f8a8568cb3b7f43ff8442c2353 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Fri, 26 Jun 2020 16:35:49 -0400 Subject: [PATCH] [dev.link] cmd/link: emit ELF relocations in mmap MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently, ELF relocations are generated sequentially in the heap and flushed to output file periodically. In fact, in some cases, the output size of the relocation records can be easily computed, as a relocation entry has fixed size. We only need to count the number of relocation records to compute the size. Once the size is computed, we can mmap the output with the proper size, and directly write relocation records in the mapped memory. It also opens the possibility of writing relocations in parallel (not done in this CL). Note: on some architectures, a Go relocation may turn into multiple ELF relocations, which makes size calculation harder. This CL does not handle those cases, and it still writes sequentially in the heap there. Linking cmd/compile with external linking, name old time/op new time/op delta Asmb2 190ms ± 2% 141ms ± 4% -25.74% (p=0.000 n=10+10) name old alloc/op new alloc/op delta Asmb2_GC 66.8MB ± 0% 8.2MB ± 0% -87.79% (p=0.008 n=5+5) name old live-B new live-B delta Asmb2_GC 66.9M ± 0% 55.2M ± 0% -17.58% (p=0.008 n=5+5) Change-Id: If7056bbe909dc90033eef6b9c4891fcca310602c Reviewed-on: https://go-review.googlesource.com/c/go/+/240399 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Than McIntosh --- src/cmd/link/internal/amd64/obj.go | 1 + src/cmd/link/internal/arm/obj.go | 1 + src/cmd/link/internal/ld/data.go | 2 ++ src/cmd/link/internal/ld/elf.go | 25 +++++++++++++++++++++- src/cmd/link/internal/ld/lib.go | 15 +++++++------ src/cmd/link/internal/ld/outbuf.go | 5 +---- src/cmd/link/internal/ld/outbuf_mmap.go | 20 ++++++++++++++++- src/cmd/link/internal/ld/outbuf_nommap.go | 7 ++++++ src/cmd/link/internal/ld/outbuf_windows.go | 15 +++++++++++++ src/cmd/link/internal/mips/obj.go | 1 + src/cmd/link/internal/mips64/obj.go | 1 + src/cmd/link/internal/s390x/obj.go | 1 + src/cmd/link/internal/sym/segment.go | 10 +++++++-- 13 files changed, 90 insertions(+), 14 deletions(-) diff --git a/src/cmd/link/internal/amd64/obj.go b/src/cmd/link/internal/amd64/obj.go index 4c525743fe..fcc2499cb0 100644 --- a/src/cmd/link/internal/amd64/obj.go +++ b/src/cmd/link/internal/amd64/obj.go @@ -61,6 +61,7 @@ func Init() (*sys.Arch, ld.Arch) { Archreloc: archreloc, Archrelocvariant: archrelocvariant, Elfreloc1: elfreloc1, + ElfrelocSize: 24, Elfsetupplt: elfsetupplt, Gentext: gentext, Machoreloc1: machoreloc1, diff --git a/src/cmd/link/internal/arm/obj.go b/src/cmd/link/internal/arm/obj.go index 1a572985b6..f25f735b0b 100644 --- a/src/cmd/link/internal/arm/obj.go +++ b/src/cmd/link/internal/arm/obj.go @@ -54,6 +54,7 @@ func Init() (*sys.Arch, ld.Arch) { Archrelocvariant: archrelocvariant, Trampoline: trampoline, Elfreloc1: elfreloc1, + ElfrelocSize: 8, Elfsetupplt: elfsetupplt, Gentext: gentext, Machoreloc1: machoreloc1, diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go index be1af4bcae..7dc2c4662d 100644 --- a/src/cmd/link/internal/ld/data.go +++ b/src/cmd/link/internal/ld/data.go @@ -47,6 +47,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" ) // isRuntimeDepPkg reports whether pkg is the runtime package or its dependency @@ -581,6 +582,7 @@ func (st *relocSymState) relocsym(s loader.Sym, P []byte) { if len(extRelocs) != 0 { st.finalizeExtRelocSlice(extRelocs) ldr.SetExtRelocs(s, extRelocs) + atomic.AddUint32(&ldr.SymSect(s).Relcount, uint32(len(extRelocs))) } } diff --git a/src/cmd/link/internal/ld/elf.go b/src/cmd/link/internal/ld/elf.go index 80612c4684..bb4e1715f4 100644 --- a/src/cmd/link/internal/ld/elf.go +++ b/src/cmd/link/internal/ld/elf.go @@ -1397,11 +1397,29 @@ func elfrelocsect(ctxt *Link, sect *sym.Section, syms []loader.Sym) { } func elfEmitReloc(ctxt *Link) { - for ctxt.Out.Offset()&7 != 0 { ctxt.Out.Write8(0) } + // Precompute the size needed for the reloc records if we can + // Mmap the output buffer with the proper size. + // + // TODO: on some architectures, one Go relocation may turn to + // multiple ELF relocations, which makes the size not fixed. + // Handle this case better. Maybe increment the counter by the + // number of external reloc records in relocsym. + var sz, filesz int64 + if thearch.ElfrelocSize != 0 { + for _, seg := range Segments { + for _, sect := range seg.Sections { + sz += int64(thearch.ElfrelocSize * sect.Relcount) + } + } + filesz = ctxt.Out.Offset() + sz + ctxt.Out.Mmap(uint64(filesz)) + } + + // Now emits the records. for _, sect := range Segtext.Sections { if sect.Name == ".text" { elfrelocsect(ctxt, sect, ctxt.Textp) @@ -1428,6 +1446,11 @@ func elfEmitReloc(ctxt *Link) { } elfrelocsect(ctxt, sect, si.syms) } + + // sanity check + if thearch.ElfrelocSize != 0 && ctxt.Out.Offset() != filesz { + panic("elfEmitReloc: size mismatch") + } } func addgonote(ctxt *Link, sectionName string, tag uint32, desc []byte) { diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go index fbf72f6715..d160139fd5 100644 --- a/src/cmd/link/internal/ld/lib.go +++ b/src/cmd/link/internal/ld/lib.go @@ -236,12 +236,13 @@ type Arch struct { Asmb func(*Link, *loader.Loader) Asmb2 func(*Link, *loader.Loader) - Elfreloc1 func(*Link, *loader.Loader, loader.Sym, loader.ExtRelocView, int64) bool - Elfsetupplt func(ctxt *Link, plt, gotplt *loader.SymbolBuilder, dynamic loader.Sym) - Gentext func(*Link, *loader.Loader) - Machoreloc1 func(*sys.Arch, *OutBuf, *loader.Loader, loader.Sym, loader.ExtRelocView, int64) bool - PEreloc1 func(*sys.Arch, *OutBuf, *loader.Loader, loader.Sym, loader.ExtRelocView, int64) bool - Xcoffreloc1 func(*sys.Arch, *OutBuf, *loader.Loader, loader.Sym, loader.ExtRelocView, int64) bool + Elfreloc1 func(*Link, *loader.Loader, loader.Sym, loader.ExtRelocView, int64) bool + ElfrelocSize uint32 // size of an ELF relocation record, must match Elfreloc1. Currently this can be 0, meaning that the size is not fixed (a Go reloc may turn into multiple ELF reloc). + Elfsetupplt func(ctxt *Link, plt, gotplt *loader.SymbolBuilder, dynamic loader.Sym) + Gentext func(*Link, *loader.Loader) + Machoreloc1 func(*sys.Arch, *OutBuf, *loader.Loader, loader.Sym, loader.ExtRelocView, int64) bool + PEreloc1 func(*sys.Arch, *OutBuf, *loader.Loader, loader.Sym, loader.ExtRelocView, int64) bool + Xcoffreloc1 func(*sys.Arch, *OutBuf, *loader.Loader, loader.Sym, loader.ExtRelocView, int64) bool // TLSIEtoLE converts a TLS Initial Executable relocation to // a TLS Local Executable relocation. @@ -310,6 +311,8 @@ var ( Segrelrodata sym.Segment Segdata sym.Segment Segdwarf sym.Segment + + Segments = []*sym.Segment{&Segtext, &Segrodata, &Segrelrodata, &Segdata, &Segdwarf} ) const pkgdef = "__.PKGDEF" diff --git a/src/cmd/link/internal/ld/outbuf.go b/src/cmd/link/internal/ld/outbuf.go index b474067dd9..f0178288a6 100644 --- a/src/cmd/link/internal/ld/outbuf.go +++ b/src/cmd/link/internal/ld/outbuf.go @@ -149,13 +149,10 @@ func (out *OutBuf) copyHeap() bool { bufLen := len(out.buf) heapLen := len(out.heap) total := uint64(bufLen + heapLen) - out.munmap() if heapLen != 0 { - if err := out.Mmap(total); err != nil { + if err := out.Mmap(total); err != nil { // Mmap will copy out.heap over to out.buf panic(err) } - copy(out.buf[bufLen:], out.heap[:heapLen]) - out.heap = out.heap[:0] } return true } diff --git a/src/cmd/link/internal/ld/outbuf_mmap.go b/src/cmd/link/internal/ld/outbuf_mmap.go index 7280027e92..53b14b09cc 100644 --- a/src/cmd/link/internal/ld/outbuf_mmap.go +++ b/src/cmd/link/internal/ld/outbuf_mmap.go @@ -10,7 +10,15 @@ import ( "syscall" ) +// Mmap maps the output file with the given size. It unmaps the old mapping +// if it is already mapped. It also flushes any in-heap data to the new +// mapping. func (out *OutBuf) Mmap(filesize uint64) (err error) { + oldlen := len(out.buf) + if oldlen != 0 { + out.munmap() + } + for { if err = out.fallocate(filesize); err != syscall.EINTR { break @@ -29,7 +37,17 @@ func (out *OutBuf) Mmap(filesize uint64) (err error) { Exitf("resize output file failed: %v", err) } out.buf, err = syscall.Mmap(int(out.f.Fd()), 0, int(filesize), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED|syscall.MAP_FILE) - return err + if err != nil { + return err + } + + // copy heap to new mapping + if uint64(oldlen+len(out.heap)) > filesize { + panic("mmap size too small") + } + copy(out.buf[oldlen:], out.heap) + out.heap = out.heap[:0] + return nil } func (out *OutBuf) munmap() { diff --git a/src/cmd/link/internal/ld/outbuf_nommap.go b/src/cmd/link/internal/ld/outbuf_nommap.go index bad01dc6d5..6b4025384b 100644 --- a/src/cmd/link/internal/ld/outbuf_nommap.go +++ b/src/cmd/link/internal/ld/outbuf_nommap.go @@ -6,9 +6,16 @@ package ld +// Mmap allocates an in-heap output buffer with the given size. It copies +// any old data (if any) to the new buffer. func (out *OutBuf) Mmap(filesize uint64) error { // We need space to put all the symbols before we apply relocations. + oldheap := out.heap + if filesize < uint64(len(oldheap)) { + panic("mmap size too small") + } out.heap = make([]byte, filesize) + copy(out.heap, oldheap) return nil } diff --git a/src/cmd/link/internal/ld/outbuf_windows.go b/src/cmd/link/internal/ld/outbuf_windows.go index 807c0e227d..60dc1ab92d 100644 --- a/src/cmd/link/internal/ld/outbuf_windows.go +++ b/src/cmd/link/internal/ld/outbuf_windows.go @@ -10,7 +10,15 @@ import ( "unsafe" ) +// Mmap maps the output file with the given size. It unmaps the old mapping +// if it is already mapped. It also flushes any in-heap data to the new +// mapping. func (out *OutBuf) Mmap(filesize uint64) error { + oldlen := len(out.buf) + if oldlen != 0 { + out.munmap() + } + err := out.f.Truncate(int64(filesize)) if err != nil { Exitf("resize output file failed: %v", err) @@ -28,6 +36,13 @@ func (out *OutBuf) Mmap(filesize uint64) error { return err } *(*reflect.SliceHeader)(unsafe.Pointer(&out.buf)) = reflect.SliceHeader{Data: ptr, Len: int(filesize), Cap: int(filesize)} + + // copy heap to new mapping + if uint64(oldlen+len(out.heap)) > filesize { + panic("mmap size too small") + } + copy(out.buf[oldlen:], out.heap) + out.heap = out.heap[:0] return nil } diff --git a/src/cmd/link/internal/mips/obj.go b/src/cmd/link/internal/mips/obj.go index cc3cc431b7..e59c382bfa 100644 --- a/src/cmd/link/internal/mips/obj.go +++ b/src/cmd/link/internal/mips/obj.go @@ -53,6 +53,7 @@ func Init() (*sys.Arch, ld.Arch) { Archreloc: archreloc, Archrelocvariant: archrelocvariant, Elfreloc1: elfreloc1, + ElfrelocSize: 8, Elfsetupplt: elfsetupplt, Gentext: gentext, Machoreloc1: machoreloc1, diff --git a/src/cmd/link/internal/mips64/obj.go b/src/cmd/link/internal/mips64/obj.go index 449a200928..6ef27cedb9 100644 --- a/src/cmd/link/internal/mips64/obj.go +++ b/src/cmd/link/internal/mips64/obj.go @@ -52,6 +52,7 @@ func Init() (*sys.Arch, ld.Arch) { Archreloc: archreloc, Archrelocvariant: archrelocvariant, Elfreloc1: elfreloc1, + ElfrelocSize: 24, Elfsetupplt: elfsetupplt, Gentext: gentext, Machoreloc1: machoreloc1, diff --git a/src/cmd/link/internal/s390x/obj.go b/src/cmd/link/internal/s390x/obj.go index bb62fe179f..8acc1d4917 100644 --- a/src/cmd/link/internal/s390x/obj.go +++ b/src/cmd/link/internal/s390x/obj.go @@ -51,6 +51,7 @@ func Init() (*sys.Arch, ld.Arch) { Archreloc: archreloc, Archrelocvariant: archrelocvariant, Elfreloc1: elfreloc1, + ElfrelocSize: 24, Elfsetupplt: elfsetupplt, Gentext: gentext, Machoreloc1: machoreloc1, diff --git a/src/cmd/link/internal/sym/segment.go b/src/cmd/link/internal/sym/segment.go index 9b49c62dda..97853b9355 100644 --- a/src/cmd/link/internal/sym/segment.go +++ b/src/cmd/link/internal/sym/segment.go @@ -55,6 +55,12 @@ type Section struct { Elfsect interface{} // an *ld.ElfShdr Reloff uint64 Rellen uint64 - Sym LoaderSym // symbol for the section, if any - Index uint16 // each section has a unique index, used internally + // Relcount is the number of *host* relocations applied to this section + // (when external linking). + // Incremented atomically on multiple goroutines. + // Note: this may differ from number of Go relocations, as one Go relocation + // may turn into multiple host relocations. + Relcount uint32 + Sym LoaderSym // symbol for the section, if any + Index uint16 // each section has a unique index, used internally } -- 2.50.0