From: Heschi Kreinick Date: Sun, 6 May 2018 01:49:40 +0000 (-0400) Subject: cmd/link: compress DWARF sections in ELF binaries X-Git-Tag: go1.11beta1~74 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=594eae5ad0ac232503a55c2c76c699d1d58b1699;p=gostls13.git cmd/link: compress DWARF sections in ELF binaries Forked from CL 111895. The trickiest part of this is that the binary layout code (blk, elfshbits, and various other things) assumes a constant offset between symbols' and sections' file locations and their virtual addresses. Compression, of course, breaks this constant offset. But we need to assign virtual addresses to everything before compression in order to resolve relocations before compression. As a result, compression needs to re-compute the "address" of the DWARF sections and symbols based on their compressed size. Luckily, these are at the end of the file, so this doesn't perturb any other sections or symbols. (And there is, of course, a surprising amount of code that assumes the DWARF segment comes last, so what's one more place?) Relevant benchmarks: name old time/op new time/op delta StdCmd 10.3s ± 2% 10.8s ± 1% +5.43% (p=0.000 n=30+30) name old text-bytes new text-bytes delta HelloSize 746kB ± 0% 746kB ± 0% ~ (all equal) CmdGoSize 8.41MB ± 0% 8.41MB ± 0% ~ (all equal) [Geo mean] 2.50MB 2.50MB +0.00% name old data-bytes new data-bytes delta HelloSize 10.6kB ± 0% 10.6kB ± 0% ~ (all equal) CmdGoSize 252kB ± 0% 252kB ± 0% ~ (all equal) [Geo mean] 51.5kB 51.5kB +0.00% name old bss-bytes new bss-bytes delta HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal) CmdGoSize 145kB ± 0% 145kB ± 0% ~ (all equal) [Geo mean] 135kB 135kB +0.00% name old exe-bytes new exe-bytes delta HelloSize 1.60MB ± 0% 1.05MB ± 0% -34.39% (p=0.000 n=30+30) CmdGoSize 16.5MB ± 0% 11.3MB ± 0% -31.76% (p=0.000 n=30+30) [Geo mean] 5.14MB 3.44MB -33.08% Fixes #11799. Updates #6853. Change-Id: I64197afe4c01a237523a943088051ee056331c6f Reviewed-on: https://go-review.googlesource.com/118276 Run-TryBot: Heschi Kreinick TryBot-Result: Gobot Gobot Reviewed-by: Austin Clements Reviewed-by: Ian Lance Taylor --- diff --git a/src/cmd/dist/buildtool.go b/src/cmd/dist/buildtool.go index 889fd02aaf..94b7587026 100644 --- a/src/cmd/dist/buildtool.go +++ b/src/cmd/dist/buildtool.go @@ -80,6 +80,8 @@ var bootstrapDirs = []string{ "cmd/link/internal/s390x", "cmd/link/internal/sym", "cmd/link/internal/x86", + "compress/flate", + "compress/zlib", "cmd/link/internal/wasm", "container/heap", "debug/dwarf", diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go index e358734526..93c77c006b 100644 --- a/src/cmd/link/internal/ld/data.go +++ b/src/cmd/link/internal/ld/data.go @@ -32,10 +32,13 @@ package ld import ( + "bytes" "cmd/internal/gcprog" "cmd/internal/objabi" "cmd/internal/sys" "cmd/link/internal/sym" + "compress/zlib" + "encoding/binary" "fmt" "log" "os" @@ -679,6 +682,10 @@ func blk(ctxt *Link, syms []*sym.Symbol, addr, size int64, pad []byte) { } } + // This doesn't distinguish the memory size from the file + // size, and it lays out the file based on Symbol.Value, which + // is the virtual address. DWARF compression changes file sizes, + // so dwarfcompress will fix this up later if necessary. eaddr := addr + size for _, s := range syms { if s.Attr.SubSymbol() { @@ -2154,3 +2161,44 @@ func (ctxt *Link) AddTramp(s *sym.Symbol) { ctxt.Logf("trampoline %s inserted\n", s) } } + +// compressSyms compresses syms and returns the contents of the +// compressed section. If the section would get larger, it returns nil. +func compressSyms(ctxt *Link, syms []*sym.Symbol) []byte { + var total int64 + for _, sym := range syms { + total += sym.Size + } + + var buf bytes.Buffer + buf.Write([]byte("ZLIB")) + var sizeBytes [8]byte + binary.BigEndian.PutUint64(sizeBytes[:], uint64(total)) + buf.Write(sizeBytes[:]) + + z := zlib.NewWriter(&buf) + for _, sym := range syms { + if _, err := z.Write(sym.P); err != nil { + log.Fatalf("compression failed: %s", err) + } + for i := sym.Size - int64(len(sym.P)); i > 0; { + b := zeros[:] + if i < int64(len(b)) { + b = b[:i] + } + n, err := z.Write(b) + if err != nil { + log.Fatalf("compression failed: %s", err) + } + i -= int64(n) + } + } + if err := z.Close(); err != nil { + log.Fatalf("compression failed: %s", err) + } + if int64(buf.Len()) >= total { + // Compression didn't save any space. + return nil + } + return buf.Bytes() +} diff --git a/src/cmd/link/internal/ld/dwarf.go b/src/cmd/link/internal/ld/dwarf.go index 328ea1c0f4..3824dc3c2a 100644 --- a/src/cmd/link/internal/ld/dwarf.go +++ b/src/cmd/link/internal/ld/dwarf.go @@ -1908,23 +1908,14 @@ func dwarfaddshstrings(ctxt *Link, shstrtab *sym.Symbol) { return } - Addstring(shstrtab, ".debug_abbrev") - Addstring(shstrtab, ".debug_frame") - Addstring(shstrtab, ".debug_info") - Addstring(shstrtab, ".debug_loc") - Addstring(shstrtab, ".debug_line") - Addstring(shstrtab, ".debug_pubnames") - Addstring(shstrtab, ".debug_pubtypes") - Addstring(shstrtab, ".debug_gdb_scripts") - Addstring(shstrtab, ".debug_ranges") - if ctxt.LinkMode == LinkExternal { - Addstring(shstrtab, elfRelType+".debug_info") - Addstring(shstrtab, elfRelType+".debug_loc") - Addstring(shstrtab, elfRelType+".debug_line") - Addstring(shstrtab, elfRelType+".debug_frame") - Addstring(shstrtab, elfRelType+".debug_pubnames") - Addstring(shstrtab, elfRelType+".debug_pubtypes") - Addstring(shstrtab, elfRelType+".debug_ranges") + secs := []string{"abbrev", "frame", "info", "loc", "line", "pubnames", "pubtypes", "gdb_scripts", "ranges"} + for _, sec := range secs { + Addstring(shstrtab, ".debug_"+sec) + if ctxt.LinkMode == LinkExternal { + Addstring(shstrtab, elfRelType+".debug_"+sec) + } else { + Addstring(shstrtab, ".zdebug_"+sec) + } } } @@ -1937,6 +1928,7 @@ func dwarfaddelfsectionsyms(ctxt *Link) { if ctxt.LinkMode != LinkExternal { return } + s := ctxt.Syms.Lookup(".debug_info", 0) putelfsectionsym(ctxt.Out, s, s.Sect.Elfsect.(*ElfShdr).shnum) s = ctxt.Syms.Lookup(".debug_abbrev", 0) @@ -1954,3 +1946,58 @@ func dwarfaddelfsectionsyms(ctxt *Link) { putelfsectionsym(ctxt.Out, s, s.Sect.Elfsect.(*ElfShdr).shnum) } } + +// dwarfcompress compresses the DWARF sections. This must happen after +// relocations are applied. After this, dwarfp will contain a +// different (new) set of symbols, and sections may have been replaced. +func dwarfcompress(ctxt *Link) { + if !ctxt.IsELF || ctxt.LinkMode == LinkExternal { + return + } + + var start int + var newDwarfp []*sym.Symbol + Segdwarf.Sections = Segdwarf.Sections[:0] + for i, s := range dwarfp { + // Find the boundaries between sections and compress + // the whole section once we've found the last of its + // symbols. + if i+1 >= len(dwarfp) || s.Sect != dwarfp[i+1].Sect { + s1 := compressSyms(ctxt, dwarfp[start:i+1]) + if s1 == nil { + // Compression didn't help. + newDwarfp = append(newDwarfp, dwarfp[start:i+1]...) + Segdwarf.Sections = append(Segdwarf.Sections, s.Sect) + } else { + compressedSegName := ".zdebug_" + s.Sect.Name[len(".debug_"):] + sect := addsection(ctxt.Arch, &Segdwarf, compressedSegName, 04) + sect.Length = uint64(len(s1)) + newSym := ctxt.Syms.Lookup(compressedSegName, 0) + newSym.P = s1 + newSym.Size = int64(len(s1)) + newSym.Sect = sect + newDwarfp = append(newDwarfp, newSym) + } + start = i + 1 + } + } + dwarfp = newDwarfp + + // Re-compute the locations of the compressed DWARF symbols + // and sections, since the layout of these within the file is + // based on Section.Vaddr and Symbol.Value. + pos := Segdwarf.Vaddr + var prevSect *sym.Section + for _, s := range dwarfp { + s.Value = int64(pos) + if s.Sect != prevSect { + s.Sect.Vaddr = uint64(s.Value) + prevSect = s.Sect + } + if s.Sub != nil { + log.Fatalf("%s: unexpected sub-symbols", s) + } + pos += uint64(s.Size) + } + Segdwarf.Length = pos - Segdwarf.Vaddr +} diff --git a/src/cmd/link/internal/ld/elf.go b/src/cmd/link/internal/ld/elf.go index 60d387c193..877e4bfd5f 100644 --- a/src/cmd/link/internal/ld/elf.go +++ b/src/cmd/link/internal/ld/elf.go @@ -1261,7 +1261,7 @@ func elfshbits(linkmode LinkMode, sect *sym.Section) *ElfShdr { sh.flags |= SHF_TLS sh.type_ = SHT_NOBITS } - if strings.HasPrefix(sect.Name, ".debug") { + if strings.HasPrefix(sect.Name, ".debug") || strings.HasPrefix(sect.Name, ".zdebug") { sh.flags = 0 } diff --git a/src/cmd/link/internal/ld/main.go b/src/cmd/link/internal/ld/main.go index 23dfa277d0..e012383e69 100644 --- a/src/cmd/link/internal/ld/main.go +++ b/src/cmd/link/internal/ld/main.go @@ -226,6 +226,7 @@ func Main(arch *sys.Arch, theArch Arch) { ctxt.dodata() order := ctxt.address() ctxt.reloc() + dwarfcompress(ctxt) ctxt.layout(order) thearch.Asmb(ctxt) ctxt.undef()