From fb184a383e756cb57267590ac290be0d3bb64874 Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Fri, 16 Oct 2020 09:22:31 -0400 Subject: [PATCH] cmd/link: emit include directories in DWARF line table prologue This patch changes the way the linker emits the DWARF line table prologue, specifically the file table. Previously files were left unmodified, and the directory table was empty. For each compilation unit we now scan the unit file table and build up a common set of directories, emit them into the directory table, and then emit file entries that refer to the dirs. This provides a modest binary size savings. For kubernetes kubelet: $ objdump -h /tmp/kubelet.old | fgrep debug_line 36 .zdebug_line 019a55f5 0000000000000000 0000000000000000 084a5123 2**0 $ objdump -h /tmp/kubelet.new | fgrep debug_line 36 .zdebug_line 01146fd2 0000000000000000 0000000000000000 084a510a 2**0 [where the value following the section name above is the section size in hex, so roughly a 30% decrease in this case.] The actual savings will depend on the length of the pathnames involved, so it's hard to really pin down how much savings we'll see here. In addition, emitting the files this way reduces the "compressibility" of the line table, so there could even be cases where we don't win at all. Updates #6853, #19784, #36495. Change-Id: I298d8561da5ed3ebc9d38aa772874851baa2f4f4 Reviewed-on: https://go-review.googlesource.com/c/go/+/263017 Run-TryBot: Than McIntosh TryBot-Result: Go Bot Reviewed-by: Austin Clements Trust: Than McIntosh --- src/cmd/link/internal/ld/dwarf.go | 109 ++++++++++++++++++++---------- src/debug/dwarf/line.go | 6 +- src/debug/dwarf/line_test.go | 8 +++ 3 files changed, 88 insertions(+), 35 deletions(-) diff --git a/src/cmd/link/internal/ld/dwarf.go b/src/cmd/link/internal/ld/dwarf.go index 2b95ad5a67..2ab9a55e96 100644 --- a/src/cmd/link/internal/ld/dwarf.go +++ b/src/cmd/link/internal/ld/dwarf.go @@ -23,6 +23,7 @@ import ( "cmd/link/internal/sym" "fmt" "log" + "path" "runtime" "sort" "strings" @@ -1173,13 +1174,81 @@ func expandFile(fname string) string { return expandGoroot(fname) } -// writelines collects up and chai,ns together the symbols needed to +// writeDirFileTables emits the portion of the DWARF line table +// prologue containing the include directories and file names, +// described in section 6.2.4 of the DWARF 4 standard. It walks the +// filepaths for the unit to discover any common directories, which +// are emitted to the directory table first, then the file table is +// emitted after that. +func (d *dwctxt) writeDirFileTables(unit *sym.CompilationUnit, lsu *loader.SymbolBuilder) { + type fileDir struct { + base string + dir int + } + dirNums := make(map[string]int) + dirs := []string{""} + files := []fileDir{} + + // Preprocess files to collect directories. This assumes that the + // file table is already de-duped. + for i, name := range unit.FileTable { + name := expandFile(name) + if len(name) == 0 { + // Can't have empty filenames, and having a unique + // filename is quite useful for debugging. + name = fmt.Sprintf("_%d", i) + } + // Note the use of "path" here and not "filepath". The compiler + // hard-codes to use "/" in DWARF paths (even for Windows), so we + // want to maintain that here. + file := path.Base(name) + dir := path.Dir(name) + dirIdx, ok := dirNums[dir] + if !ok && dir != "." { + dirIdx = len(dirNums) + 1 + dirNums[dir] = dirIdx + dirs = append(dirs, dir) + } + files = append(files, fileDir{base: file, dir: dirIdx}) + + // We can't use something that may be dead-code + // eliminated from a binary here. proc.go contains + // main and the scheduler, so it's not going anywhere. + if i := strings.Index(name, "runtime/proc.go"); i >= 0 { + d.dwmu.Lock() + if gdbscript == "" { + k := strings.Index(name, "runtime/proc.go") + gdbscript = name[:k] + "runtime/runtime-gdb.py" + } + d.dwmu.Unlock() + } + } + + // Emit directory section. This is a series of nul terminated + // strings, followed by a single zero byte. + lsDwsym := dwSym(lsu.Sym()) + for k := 1; k < len(dirs); k++ { + d.AddString(lsDwsym, dirs[k]) + } + lsu.AddUint8(0) // terminator + + // Emit file section. + for k := 0; k < len(files); k++ { + d.AddString(lsDwsym, files[k].base) + dwarf.Uleb128put(d, lsDwsym, int64(files[k].dir)) + lsu.AddUint8(0) // mtime + lsu.AddUint8(0) // length + } + lsu.AddUint8(0) // terminator +} + +// writelines collects up and chains together the symbols needed to // form the DWARF line table for the specified compilation unit, // returning a list of symbols. The returned list will include an -// initial symbol containing the line table header and prolog (with +// initial symbol containing the line table header and prologue (with // file table), then a series of compiler-emitted line table symbols // (one per live function), and finally an epilog symbol containing an -// end-of-sequence operator. The prolog and epilog symbols are passed +// end-of-sequence operator. The prologue and epilog symbols are passed // in (having been created earlier); here we add content to them. func (d *dwctxt) writelines(unit *sym.CompilationUnit, lineProlog loader.Sym) []loader.Sym { is_stmt := uint8(1) // initially = recommended default_is_stmt = 1, tracks is_stmt toggles. @@ -1220,39 +1289,11 @@ func (d *dwctxt) writelines(unit *sym.CompilationUnit, lineProlog loader.Sym) [] lsu.AddUint8(0) // standard_opcode_lengths[8] lsu.AddUint8(1) // standard_opcode_lengths[9] lsu.AddUint8(0) // standard_opcode_lengths[10] - lsu.AddUint8(0) // include_directories (empty) - - // Copy over the file table. - fileNums := make(map[string]int) - for i, name := range unit.FileTable { - name := expandFile(name) - if len(name) == 0 { - // Can't have empty filenames, and having a unique - // filename is quite useful for debugging. - name = fmt.Sprintf("_%d", i) - } - fileNums[name] = i + 1 - d.AddString(lsDwsym, name) - lsu.AddUint8(0) - lsu.AddUint8(0) - lsu.AddUint8(0) - // We can't use something that may be dead-code - // eliminated from a binary here. proc.go contains - // main and the scheduler, so it's not going anywhere. - if i := strings.Index(name, "runtime/proc.go"); i >= 0 { - d.dwmu.Lock() - if gdbscript == "" { - k := strings.Index(name, "runtime/proc.go") - gdbscript = name[:k] + "runtime/runtime-gdb.py" - } - d.dwmu.Unlock() - } - } + // Call helper to emit dir and file sections. + d.writeDirFileTables(unit, lsu) - // 4 zeros: the string termination + 3 fields. - lsu.AddUint8(0) - // terminate file_names. + // capture length at end of file names. headerend = lsu.Size() unitlen := lsu.Size() - unitstart diff --git a/src/debug/dwarf/line.go b/src/debug/dwarf/line.go index 7692f05552..c4937ca7dd 100644 --- a/src/debug/dwarf/line.go +++ b/src/debug/dwarf/line.go @@ -814,7 +814,11 @@ func pathJoin(dirname, filename string) string { // Drives are the same. Ignore drive on filename. } if !(strings.HasSuffix(dirname, "/") || strings.HasSuffix(dirname, `\`)) && dirname != "" { - dirname += `\` + sep := `\` + if strings.HasPrefix(dirname, "/") { + sep = `/` + } + dirname += sep } return drive + dirname + filename } diff --git a/src/debug/dwarf/line_test.go b/src/debug/dwarf/line_test.go index 1fd9b19b03..b13818e8b5 100644 --- a/src/debug/dwarf/line_test.go +++ b/src/debug/dwarf/line_test.go @@ -341,6 +341,14 @@ var joinTests = []joinTest{ {`\\host\share\`, `foo\bar`, `\\host\share\foo\bar`}, {`//host/share/`, `foo/bar`, `//host/share/foo/bar`}, + // Note: the Go compiler currently emits DWARF line table paths + // with '/' instead of '\' (see issues #19784, #36495). These + // tests are to cover cases that might come up for Windows Go + // binaries. + {`c:/workdir/go/src/x`, `y.go`, `c:/workdir/go/src/x/y.go`}, + {`d:/some/thing/`, `b.go`, `d:/some/thing/b.go`}, + {`e:\blah\`, `foo.c`, `e:\blah\foo.c`}, + // The following are "best effort". We shouldn't see relative // base directories in DWARF, but these test that pathJoin // doesn't fail miserably if it sees one. -- 2.48.1