From a99f812cba4c5a5207fed9be5488312a44a5df34 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Sat, 2 Jul 2016 17:19:25 -0700 Subject: [PATCH] cmd/objdump: implement objdump of .o files Update goobj reader so it can provide all the information necessary to disassemble .o (and .a) files. Grab architecture of .o files from header. .o files have relocations in them. This CL also contains a simple mechanism to disassemble relocations and add relocation info as an extra column in the output. Fixes #13862 Change-Id: I608fd253ff1522ea47f18be650b38d528dae9054 Reviewed-on: https://go-review.googlesource.com/24818 Reviewed-by: Ian Lance Taylor Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot --- src/cmd/internal/goobj/read.go | 35 +++++++++--- src/cmd/internal/obj/link.go | 8 ++- src/cmd/internal/obj/reloctype_string.go | 17 ++++++ src/cmd/internal/objfile/disasm.go | 24 +++++++- src/cmd/internal/objfile/goobj.go | 71 ++++++++++++++++++++---- src/cmd/internal/objfile/objfile.go | 31 +++++++++-- src/cmd/internal/sys/arch.go | 13 +++++ src/cmd/link/internal/arm/asm.go | 4 +- src/cmd/link/internal/ld/ldelf.go | 2 +- src/cmd/link/internal/ld/ldmacho.go | 2 +- src/cmd/link/internal/ld/link.go | 3 +- src/cmd/link/internal/ld/objfile.go | 2 +- src/cmd/pprof/pprof.go | 5 +- src/debug/gosym/pclntab.go | 10 +++- 14 files changed, 186 insertions(+), 41 deletions(-) create mode 100644 src/cmd/internal/obj/reloctype_string.go diff --git a/src/cmd/internal/goobj/read.go b/src/cmd/internal/goobj/read.go index 214f65cbc4..329f80146d 100644 --- a/src/cmd/internal/goobj/read.go +++ b/src/cmd/internal/goobj/read.go @@ -163,7 +163,7 @@ type Data struct { // A Reloc describes a relocation applied to a memory image to refer // to an address within a particular symbol. type Reloc struct { - // The bytes at [Offset, Offset+Size) within the memory image + // The bytes at [Offset, Offset+Size) within the containing Sym // should be updated to refer to the address Add bytes after the start // of the symbol Sym. Offset int @@ -174,7 +174,7 @@ type Reloc struct { // The Type records the form of address expected in the bytes // described by the previous fields: absolute, PC-relative, and so on. // TODO(rsc): The interpretation of Type is not exposed by this package. - Type int + Type obj.RelocType } // A Var describes a variable in a function stack frame: a declared @@ -220,6 +220,7 @@ type Package struct { SymRefs []SymID // list of symbol names and versions referred to by this pack Syms []*Sym // symbols defined by this package MaxVersion int // maximum Version in any SymID in Syms + Arch string // architecture } var ( @@ -561,14 +562,13 @@ func (r *objReader) parseArchive() error { // The format of that part is defined in a comment at the top // of src/liblink/objfile.c. func (r *objReader) parseObject(prefix []byte) error { - // TODO(rsc): Maybe use prefix and the initial input to - // record the header line from the file, which would - // give the architecture and other version information. - r.p.MaxVersion++ + h := make([]byte, 0, 256) + h = append(h, prefix...) var c1, c2, c3 byte for { c1, c2, c3 = c2, c3, r.readByte() + h = append(h, c3) // The new export format can contain 0 bytes. // Don't consider them errors, only look for r.err != nil. if r.err != nil { @@ -579,6 +579,12 @@ func (r *objReader) parseObject(prefix []byte) error { } } + hs := strings.Fields(string(h)) + if len(hs) >= 4 { + r.p.Arch = hs[3] + } + // TODO: extract OS + build ID if/when we need it + r.readFull(r.tmp[:8]) if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go17ld")) { return r.error(errCorruptObject) @@ -643,7 +649,7 @@ func (r *objReader) parseObject(prefix []byte) error { rel := &s.Reloc[i] rel.Offset = r.readInt() rel.Size = r.readInt() - rel.Type = r.readInt() + rel.Type = obj.RelocType(r.readInt()) rel.Add = r.readInt() rel.Sym = r.readSymID() } @@ -693,3 +699,18 @@ func (r *objReader) parseObject(prefix []byte) error { return nil } + +func (r *Reloc) String(insnOffset uint64) string { + delta := r.Offset - int(insnOffset) + s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type) + if r.Sym.Name != "" { + if r.Add != 0 { + return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add) + } + return fmt.Sprintf("%s:%s", s, r.Sym.Name) + } + if r.Add != 0 { + return fmt.Sprintf("%s:%d", s, r.Add) + } + return s +} diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index e5ed859eb1..f72a191d02 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -442,14 +442,16 @@ const ( type Reloc struct { Off int32 Siz uint8 - Type int32 + Type RelocType Add int64 Sym *LSym } -// Reloc.type +type RelocType int32 + +//go:generate stringer -type=RelocType const ( - R_ADDR = 1 + iota + R_ADDR RelocType = 1 + iota // R_ADDRPOWER relocates a pair of "D-form" instructions (instructions with 16-bit // immediates in the low half of the instruction word), usually addis followed by // another add or a load, inserting the "high adjusted" 16 bits of the address of diff --git a/src/cmd/internal/obj/reloctype_string.go b/src/cmd/internal/obj/reloctype_string.go new file mode 100644 index 0000000000..6de617cd78 --- /dev/null +++ b/src/cmd/internal/obj/reloctype_string.go @@ -0,0 +1,17 @@ +// Code generated by "stringer -type=RelocType"; DO NOT EDIT + +package obj + +import "fmt" + +const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_METHODOFFR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_PCRELDBLR_ADDRMIPSUR_ADDRMIPSTLS" + +var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 195, 206, 216, 225, 235, 249, 263, 279, 293, 307, 318, 332, 347, 364, 382, 403, 413, 424, 437} + +func (i RelocType) String() string { + i -= 1 + if i < 0 || i >= RelocType(len(_RelocType_index)-1) { + return fmt.Sprintf("RelocType(%d)", i+1) + } + return _RelocType_name[_RelocType_index[i]:_RelocType_index[i+1]] +} diff --git a/src/cmd/internal/objfile/disasm.go b/src/cmd/internal/objfile/disasm.go index 25c3301ab8..771187bfe4 100644 --- a/src/cmd/internal/objfile/disasm.go +++ b/src/cmd/internal/objfile/disasm.go @@ -22,7 +22,7 @@ import ( // Disasm is a disassembler for a given File. type Disasm struct { syms []Sym //symbols in file, sorted by address - pcln *gosym.Table // pcln table + pcln Liner // pcln table text []byte // bytes of text segment (actual instructions) textStart uint64 // start PC of text textEnd uint64 // end PC of text @@ -116,6 +116,7 @@ func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64) { for _, sym := range d.syms { symStart := sym.Addr symEnd := sym.Addr + uint64(sym.Size) + relocs := sym.Relocs if sym.Code != 'T' && sym.Code != 't' || symStart < d.textStart || symEnd <= start || end <= symStart || @@ -135,7 +136,7 @@ func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64) { symEnd = end } code := d.text[:end-d.textStart] - d.Decode(symStart, symEnd, func(pc, size uint64, file string, line int, text string) { + d.Decode(symStart, symEnd, relocs, func(pc, size uint64, file string, line int, text string) { i := pc - d.textStart fmt.Fprintf(tw, "\t%s:%d\t%#x\t", base(file), line, pc) if size%4 != 0 || d.goarch == "386" || d.goarch == "amd64" { @@ -158,7 +159,7 @@ func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64) { } // Decode disassembles the text segment range [start, end), calling f for each instruction. -func (d *Disasm) Decode(start, end uint64, f func(pc, size uint64, file string, line int, text string)) { +func (d *Disasm) Decode(start, end uint64, relocs []Reloc, f func(pc, size uint64, file string, line int, text string)) { if start < d.textStart { start = d.textStart } @@ -171,6 +172,17 @@ func (d *Disasm) Decode(start, end uint64, f func(pc, size uint64, file string, i := pc - d.textStart text, size := d.disasm(code[i:], pc, lookup) file, line, _ := d.pcln.PCToLine(pc) + text += "\t" + first := true + for len(relocs) > 0 && relocs[0].Addr < i+uint64(size) { + if first { + first = false + } else { + text += " " + } + text += relocs[0].Stringer.String(pc - start) + relocs = relocs[1:] + } f(pc, uint64(size), file, line, text) pc += uint64(size) } @@ -247,3 +259,9 @@ var byteOrders = map[string]binary.ByteOrder{ "ppc64le": binary.LittleEndian, "s390x": binary.BigEndian, } + +type Liner interface { + // Given a pc, returns the corresponding file, line, and function data. + // If unknown, returns "",0,nil. + PCToLine(uint64) (string, int, *gosym.Func) +} diff --git a/src/cmd/internal/objfile/goobj.go b/src/cmd/internal/objfile/goobj.go index 43435efc68..230137e0f5 100644 --- a/src/cmd/internal/objfile/goobj.go +++ b/src/cmd/internal/objfile/goobj.go @@ -8,7 +8,9 @@ package objfile import ( "cmd/internal/goobj" + "cmd/internal/sys" "debug/dwarf" + "debug/gosym" "errors" "fmt" "os" @@ -16,6 +18,7 @@ import ( type goobjFile struct { goobj *goobj.Package + f *os.File // the underlying .o or .a file } func openGoobj(r *os.File) (rawFile, error) { @@ -23,7 +26,7 @@ func openGoobj(r *os.File) (rawFile, error) { if err != nil { return nil, err } - return &goobjFile{f}, nil + return &goobjFile{goobj: f, f: r}, nil } func goobjName(id goobj.SymID) string { @@ -55,6 +58,9 @@ func (f *goobjFile) symbols() ([]Sym, error) { if s.Version != 0 { sym.Code += 'a' - 'A' } + for i, r := range s.Reloc { + sym.Relocs = append(sym.Relocs, Reloc{Addr: uint64(s.Data.Offset) + uint64(r.Offset), Size: uint64(r.Size), Stringer: &s.Reloc[i]}) + } syms = append(syms, sym) } @@ -75,23 +81,68 @@ func (f *goobjFile) symbols() ([]Sym, error) { return syms, nil } -// pcln does not make sense for Go object files, because each -// symbol has its own individual pcln table, so there is no global -// space of addresses to map. func (f *goobjFile) pcln() (textStart uint64, symtab, pclntab []byte, err error) { + // Should never be called. We implement Liner below, callers + // should use that instead. return 0, nil, nil, fmt.Errorf("pcln not available in go object file") } -// text does not make sense for Go object files, because -// each function has a separate section. +// Find returns the file name, line, and function data for the given pc. +// Returns "",0,nil if unknown. +// This function implements the Liner interface in preference to pcln() above. +func (f *goobjFile) PCToLine(pc uint64) (string, int, *gosym.Func) { + // TODO: this is really inefficient. Binary search? Memoize last result? + var arch *sys.Arch + for _, a := range sys.Archs { + if a.Name == f.goobj.Arch { + arch = a + break + } + } + if arch == nil { + return "", 0, nil + } + for _, s := range f.goobj.Syms { + if pc < uint64(s.Data.Offset) || pc >= uint64(s.Data.Offset+s.Data.Size) { + continue + } + if s.Func == nil { + return "", 0, nil + } + pcfile := make([]byte, s.Func.PCFile.Size) + _, err := f.f.ReadAt(pcfile, s.Func.PCFile.Offset) + if err != nil { + return "", 0, nil + } + fileID := gosym.PCValue(pcfile, pc-uint64(s.Data.Offset), arch.MinLC) + fileName := s.Func.File[fileID] + pcline := make([]byte, s.Func.PCLine.Size) + _, err = f.f.ReadAt(pcline, s.Func.PCLine.Offset) + if err != nil { + return "", 0, nil + } + line := gosym.PCValue(pcline, pc-uint64(s.Data.Offset), arch.MinLC) + // Note: we provide only the name in the Func structure. + // We could provide more if needed. + return fileName, line, &gosym.Func{Sym: &gosym.Sym{Name: s.Name}} + } + return "", 0, nil +} + +// We treat the whole object file as the text section. func (f *goobjFile) text() (textStart uint64, text []byte, err error) { - return 0, nil, fmt.Errorf("text not available in go object file") + var info os.FileInfo + info, err = f.f.Stat() + if err != nil { + return + } + text = make([]byte, info.Size()) + _, err = f.f.ReadAt(text, 0) + return } -// goarch makes sense but is not exposed in debug/goobj's API, -// and we don't need it yet for any users of internal/objfile. func (f *goobjFile) goarch() string { - return "GOARCH unimplemented for debug/goobj files" + return f.goobj.Arch } func (f *goobjFile) loadAddress() (uint64, error) { diff --git a/src/cmd/internal/objfile/objfile.go b/src/cmd/internal/objfile/objfile.go index e5d99f086b..2bf6363f29 100644 --- a/src/cmd/internal/objfile/objfile.go +++ b/src/cmd/internal/objfile/objfile.go @@ -30,11 +30,24 @@ type File struct { // A Sym is a symbol defined in an executable file. type Sym struct { - Name string // symbol name - Addr uint64 // virtual address of symbol - Size int64 // size in bytes - Code rune // nm code (T for text, D for data, and so on) - Type string // XXX? + Name string // symbol name + Addr uint64 // virtual address of symbol + Size int64 // size in bytes + Code rune // nm code (T for text, D for data, and so on) + Type string // XXX? + Relocs []Reloc // in increasing Addr order +} + +type Reloc struct { + Addr uint64 // Address of first byte that reloc applies to. + Size uint64 // Number of bytes + Stringer RelocStringer +} + +type RelocStringer interface { + // insnOffset is the offset of the instruction containing the relocation + // from the start of the symbol containing the relocation. + String(insnOffset uint64) string } var openers = []func(*os.File) (rawFile, error){ @@ -80,7 +93,13 @@ func (x byAddr) Less(i, j int) bool { return x[i].Addr < x[j].Addr } func (x byAddr) Len() int { return len(x) } func (x byAddr) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (f *File) PCLineTable() (*gosym.Table, error) { +func (f *File) PCLineTable() (Liner, error) { + // If the raw file implements Liner directly, use that. + // Currently, only Go intermediate objects and archives (goobj) use this path. + if pcln, ok := f.raw.(Liner); ok { + return pcln, nil + } + // Otherwise, read the pcln tables and build a Liner out of that. textStart, symtab, pclntab, err := f.raw.pcln() if err != nil { return nil, err diff --git a/src/cmd/internal/sys/arch.go b/src/cmd/internal/sys/arch.go index 18accdeb0c..7033f3fb78 100644 --- a/src/cmd/internal/sys/arch.go +++ b/src/cmd/internal/sys/arch.go @@ -146,3 +146,16 @@ var ArchS390X = &Arch{ RegSize: 8, MinLC: 2, } + +var Archs = [...]*Arch{ + Arch386, + ArchAMD64, + ArchAMD64P32, + ArchARM, + ArchARM64, + ArchMIPS64, + ArchMIPS64LE, + ArchPPC64, + ArchPPC64LE, + ArchS390X, +} diff --git a/src/cmd/link/internal/arm/asm.go b/src/cmd/link/internal/arm/asm.go index 84060c4361..b1a0545390 100644 --- a/src/cmd/link/internal/arm/asm.go +++ b/src/cmd/link/internal/arm/asm.go @@ -492,12 +492,12 @@ func archrelocvariant(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol, t int64) int64 { return t } -func addpltreloc(ctxt *ld.Link, plt *ld.Symbol, got *ld.Symbol, sym *ld.Symbol, typ int) *ld.Reloc { +func addpltreloc(ctxt *ld.Link, plt *ld.Symbol, got *ld.Symbol, sym *ld.Symbol, typ obj.RelocType) *ld.Reloc { r := ld.Addrel(plt) r.Sym = got r.Off = int32(plt.Size) r.Siz = 4 - r.Type = int32(typ) + r.Type = typ r.Add = int64(sym.Got) - 8 plt.Attr |= ld.AttrReachable diff --git a/src/cmd/link/internal/ld/ldelf.go b/src/cmd/link/internal/ld/ldelf.go index 2b60343bc6..7848369272 100644 --- a/src/cmd/link/internal/ld/ldelf.go +++ b/src/cmd/link/internal/ld/ldelf.go @@ -912,7 +912,7 @@ func ldelf(ctxt *Link, f *bio.Reader, pkg string, length int64, pn string) { rp.Sym = sym.sym } - rp.Type = 256 + int32(info) + rp.Type = 256 + obj.RelocType(info) rp.Siz = relSize(ctxt, pn, uint32(info)) if rela != 0 { rp.Add = int64(add) diff --git a/src/cmd/link/internal/ld/ldmacho.go b/src/cmd/link/internal/ld/ldmacho.go index 0688d2386e..b846f0cbae 100644 --- a/src/cmd/link/internal/ld/ldmacho.go +++ b/src/cmd/link/internal/ld/ldmacho.go @@ -828,7 +828,7 @@ func ldmacho(ctxt *Link, f *bio.Reader, pkg string, length int64, pn string) { } rp.Siz = rel.length - rp.Type = 512 + (int32(rel.type_) << 1) + int32(rel.pcrel) + rp.Type = 512 + (obj.RelocType(rel.type_) << 1) + obj.RelocType(rel.pcrel) rp.Off = int32(rel.addr) // Handle X86_64_RELOC_SIGNED referencing a section (rel->extrn == 0). diff --git a/src/cmd/link/internal/ld/link.go b/src/cmd/link/internal/ld/link.go index 50abeb5773..56b98a6974 100644 --- a/src/cmd/link/internal/ld/link.go +++ b/src/cmd/link/internal/ld/link.go @@ -32,6 +32,7 @@ package ld import ( "bufio" + "cmd/internal/obj" "cmd/internal/sys" "debug/elf" "fmt" @@ -135,7 +136,7 @@ type Reloc struct { Off int32 Siz uint8 Done uint8 - Type int32 + Type obj.RelocType Variant int32 Add int64 Xadd int64 diff --git a/src/cmd/link/internal/ld/objfile.go b/src/cmd/link/internal/ld/objfile.go index cb77fb5536..dace73161a 100644 --- a/src/cmd/link/internal/ld/objfile.go +++ b/src/cmd/link/internal/ld/objfile.go @@ -326,7 +326,7 @@ overwrite: s.R[i] = Reloc{ Off: r.readInt32(), Siz: r.readUint8(), - Type: r.readInt32(), + Type: obj.RelocType(r.readInt32()), Add: r.readInt64(), Sym: r.readSymIndex(), } diff --git a/src/cmd/pprof/pprof.go b/src/cmd/pprof/pprof.go index 5ee8a112e0..0c979b1831 100644 --- a/src/cmd/pprof/pprof.go +++ b/src/cmd/pprof/pprof.go @@ -6,7 +6,6 @@ package main import ( "debug/dwarf" - "debug/gosym" "flag" "fmt" "net/url" @@ -161,7 +160,7 @@ func (t *objTool) Disasm(file string, start, end uint64) ([]plugin.Inst, error) return nil, err } var asm []plugin.Inst - d.Decode(start, end, func(pc, size uint64, file string, line int, text string) { + d.Decode(start, end, nil, func(pc, size uint64, file string, line int, text string) { asm = append(asm, plugin.Inst{Addr: pc, File: file, Line: line, Text: text}) }) return asm, nil @@ -203,7 +202,7 @@ type file struct { offset uint64 sym []objfile.Sym file *objfile.File - pcln *gosym.Table + pcln objfile.Liner triedDwarf bool dwarf *dwarf.Data diff --git a/src/debug/gosym/pclntab.go b/src/debug/gosym/pclntab.go index e859d5aed5..e94ed19d7d 100644 --- a/src/debug/gosym/pclntab.go +++ b/src/debug/gosym/pclntab.go @@ -291,13 +291,17 @@ func (t *LineTable) step(p *[]byte, pc *uint64, val *int32, first bool) bool { return true } +// PCValue looks up the given PC in a pc value table. target is the +// offset of the pc from the entry point. +func PCValue(tab []byte, target uint64, quantum int) int { + t := LineTable{Data: tab, quantum: uint32(quantum)} + return int(t.pcvalue(0, 0, target)) +} + // pcvalue reports the value associated with the target pc. // off is the offset to the beginning of the pc-value table, // and entry is the start PC for the corresponding function. func (t *LineTable) pcvalue(off uint32, entry, targetpc uint64) int32 { - if off == 0 { - return -1 - } p := t.Data[off:] val := int32(-1) -- 2.48.1