From 6ece1b561c8b42efce68484edd2c9a6449264e96 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 19 Aug 2009 10:05:11 -0700 Subject: [PATCH] ELF reader and Go symbol table and PC/line table decoder. R=rsc APPROVED=rsc DELTA=1425 (1425 added, 0 deleted, 0 changed) OCL=33432 CL=33517 --- usr/austin/sym/Makefile | 14 + usr/austin/sym/binary.go | 190 ++++++++++++ usr/austin/sym/elf.go | 237 ++++++++++++++ usr/austin/sym/elffmt.go | 389 +++++++++++++++++++++++ usr/austin/sym/gosymtab.go | 615 +++++++++++++++++++++++++++++++++++++ 5 files changed, 1445 insertions(+) create mode 100644 usr/austin/sym/Makefile create mode 100644 usr/austin/sym/binary.go create mode 100644 usr/austin/sym/elf.go create mode 100644 usr/austin/sym/elffmt.go create mode 100644 usr/austin/sym/gosymtab.go diff --git a/usr/austin/sym/Makefile b/usr/austin/sym/Makefile new file mode 100644 index 0000000000..8a0daef0c3 --- /dev/null +++ b/usr/austin/sym/Makefile @@ -0,0 +1,14 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include $(GOROOT)/src/Make.$(GOARCH) + +TARG=sym +GOFILES=\ + binary.go\ + elf.go\ + elffmt.go\ + gosymtab.go\ + +include $(GOROOT)/src/Make.pkg diff --git a/usr/austin/sym/binary.go b/usr/austin/sym/binary.go new file mode 100644 index 0000000000..d06179cec0 --- /dev/null +++ b/usr/austin/sym/binary.go @@ -0,0 +1,190 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sym + +import ( + "bufio"; + "io"; + "log"; + "os"; + "reflect"; +) + +type byteOrder interface { + Uint16(b []byte) uint16; + Uint32(b []byte) uint32; + Uint64(b []byte) uint64; + String() string; +} + +type olsb struct {} + +func (olsb) Uint16(b []byte) uint16 { + return uint16(b[0]) | uint16(b[1]) << 8; +} + +func (olsb) Uint32(b []byte) uint32 { + return uint32(b[0]) | uint32(b[1]) << 8 | uint32(b[2]) << 16 | uint32(b[3]) << 24; +} + +func (olsb) Uint64(b []byte) uint64 { + return uint64(b[0]) | uint64(b[1]) << 8 | uint64(b[2]) << 16 | uint64(b[3]) << 24 | uint64(b[4]) << 32 | uint64(b[5]) << 40 | uint64(b[6]) << 48 | uint64(b[7]) << 56; +} + +func (olsb) String() string { + return "LSB"; +} + +type omsb struct {} + +func (omsb) Uint16(b []byte) uint16 { + return uint16(b[1]) | uint16(b[0]) << 8; +} + +func (omsb) Uint32(b []byte) uint32 { + return uint32(b[3]) | uint32(b[2]) << 8 | uint32(b[1]) << 16 | uint32(b[0]) << 24; +} + +func (omsb) Uint64(b []byte) uint64 { + return uint64(b[7]) | uint64(b[6]) << 8 | uint64(b[5]) << 16 | uint64(b[4]) << 24 | uint64(b[3]) << 32 | uint64(b[2]) << 40 | uint64(b[1]) << 48 | uint64(b[0]) << 56; +} + +func (omsb) String() string { + return "MSB"; +} + +var ( + lsb = olsb{}; + msb = omsb{}; +) + +// A binaryReader decodes binary data from another reader. On an +// error, the Read methods simply return 0 and record the error, to +// make it more convenient to decode long sequences of binary data. +// The caller should use the Error method when convenient to check +// for errors. +type binaryReader struct { + *bufio.Reader; + err os.Error; + order byteOrder; +} + +// newBinaryReader creates a new binary data reader backed by the +// given reader and using the given byte order for decoding. +func newBinaryReader(r io.Reader, o byteOrder) *binaryReader { + return &binaryReader{bufio.NewReader(r), nil, o}; +} + +// Error returns the recorded error, or nil if no error has occurred. +func (r *binaryReader) Error() os.Error { + return r.err; +} + +func (r *binaryReader) ReadUint8() uint8 { + var buf [1]byte; + n, err := io.ReadFull(r.Reader, &buf); + if r.err == nil && err != nil { + r.err = err; + } + return buf[0]; +} + +func (r *binaryReader) ReadUint16() uint16 { + var buf [2]byte; + n, err := io.ReadFull(r.Reader, &buf); + if r.err == nil && err != nil { + r.err = err; + } + return r.order.Uint16(&buf); +} + +func (r *binaryReader) ReadUint32() uint32 { + var buf [4]byte; + n, err := io.ReadFull(r.Reader, &buf); + if r.err == nil && err != nil { + r.err = err; + } + return r.order.Uint32(&buf); +} + +func (r *binaryReader) ReadUint64() uint64 { + var buf [8]byte; + n, err := io.ReadFull(r.Reader, &buf); + if r.err == nil && err != nil { + r.err = err; + } + return r.order.Uint64(&buf); +} + +func (r *binaryReader) ReadInt8() int8 { + return int8(r.ReadUint8()); +} + +func (r *binaryReader) ReadInt16() int16 { + return int16(r.ReadUint16()); +} + +func (r *binaryReader) ReadInt32() int32 { + return int32(r.ReadUint32()); +} + +func (r *binaryReader) ReadInt64() int64 { + return int64(r.ReadUint64()); +} + +// ReadCString reads a NULL-terminated string. +func (r *binaryReader) ReadCString() string { + str, err := r.Reader.ReadLineString('\x00', false); + if r.err == nil && err != nil { + r.err = err; + } + return str; +} + +// ReadValue reads a value according to its reflected type. This can +// read any of the types for which there is a regular Read method, +// plus structs and arrays. It assumes structs contain no padding. +func (r *binaryReader) ReadValue(v reflect.Value) { + switch v := v.(type) { + case *reflect.ArrayValue: + l := v.Len(); + for i := 0; i < l; i++ { + r.ReadValue(v.Elem(i)); + } + case *reflect.StructValue: + l := v.NumField(); + for i := 0; i < l; i++ { + r.ReadValue(v.Field(i)); + } + + case *reflect.Uint8Value: + v.Set(r.ReadUint8()); + case *reflect.Uint16Value: + v.Set(r.ReadUint16()); + case *reflect.Uint32Value: + v.Set(r.ReadUint32()); + case *reflect.Uint64Value: + v.Set(r.ReadUint64()); + case *reflect.Int8Value: + v.Set(r.ReadInt8()); + case *reflect.Int16Value: + v.Set(r.ReadInt16()); + case *reflect.Int32Value: + v.Set(r.ReadInt32()); + case *reflect.Int64Value: + v.Set(r.ReadInt64()); + case *reflect.StringValue: + v.Set(r.ReadCString()); + + default: + log.Crashf("Value of unexpected type %T", v); + } +} + +// ReadAny is a convenience wrapper for ReadValue. It can be passed a +// pointer any type that can be decoded by ReadValue. +func (r *binaryReader) ReadAny(out interface {}) { + r.ReadValue(reflect.Indirect(reflect.NewValue(out))); +} diff --git a/usr/austin/sym/elf.go b/usr/austin/sym/elf.go new file mode 100644 index 0000000000..5d92ce00fa --- /dev/null +++ b/usr/austin/sym/elf.go @@ -0,0 +1,237 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sym + +import ( + "fmt"; + "io"; + "log"; + "os"; +) + +/* + * Internal ELF representation + */ + +// Elf represents a decoded ELF binary. +type Elf struct { + class int; + data byteOrder; + Type ElfType; + Machine ElfMachine; + Sections []*Section; +} + +// Section represents a single section in an ELF binary. +type Section struct { + r io.ReadSeeker; + Name string; + offset int64; + Size uint64; + Addr uint64; +} + +/* + * ELF reader + */ + +type FormatError struct { + off int64; + msg string; + val interface{}; +} + +func (e *FormatError) String() string { + msg := e.msg; + if e.val != nil { + msg += fmt.Sprintf(" '%v' ", e.val); + } + msg += fmt.Sprintf("in record at byte %#x", e.off); + return msg; +} + +// NewElf reads and decodes an ELF binary. The ELF binary is expected +// to start where the reader is currently positioned. +func NewElf(r io.ReadSeeker) (*Elf, os.Error) { + // Read ELF identifier + var ident [eiNIdent]uint8; + off, err := r.Seek(0, 0); + if err != nil { + return nil, err; + } + start := off; + n, err := io.ReadFull(r, &ident); + if err != nil { + if err == os.EOF { + err = io.ErrUnexpectedEOF; + } + return nil, err; + } + + // Decode identifier + if ident[eiMag0] != '\x7f' || ident[eiMag1] != 'E' || ident[eiMag2] != 'L' || ident[eiMag3] != 'F' { + return nil, &FormatError{off, "bad magic number", string(ident[eiMag0:eiMag3])}; + } + e := &Elf{}; + + switch ident[eiClass] { + case elfClass32: + e.class = 32; + case elfClass64: + e.class = 64; + default: + return nil, &FormatError{off, "unknown ELF class", ident[eiClass]}; + } + + switch ident[eiData] { + case elfData2LSB: + e.data = lsb; + case elfData2MSB: + e.data = msb; + default: + return nil, &FormatError{off, "unknown ELF data encoding", ident[eiData]}; + } + + if ident[eiVersion] != evCurrent { + return nil, &FormatError{off, "unknown ELF version", ident[eiVersion]}; + } + + // TODO(austin) Do something with ABI? + + // Read ELF file header + var shoff int64; + var shentsize, shnum, shstrndx int; + + br := newBinaryReader(r, e.data); + switch e.class { + case 32: + return nil, &FormatError{off, "ELF32 not implemented", nil}; + case 64: + hdr := &elf64Ehdr{}; + br.ReadAny(hdr); + if err := br.Error(); err != nil { + return nil, err; + } + + if hdr.Type > etCore && hdr.Type < etLoOS { + return nil, &FormatError{off, "unknown ELF file type", hdr.Type}; + } + e.Type = ElfType(hdr.Type); + e.Machine = ElfMachine(hdr.Machine); + if hdr.Version != evCurrent { + return nil, &FormatError{off, "unknown second ELF version", hdr.Version}; + } + + shoff = int64(hdr.Shoff); + shentsize = int(hdr.Shentsize); + shnum = int(hdr.Shnum); + shstrndx = int(hdr.Shstrndx); + } + + // Read section headers + e.Sections = make([]*Section, shnum); + secNames := make([]uint32, shnum); + for i := 0; i < shnum; i++ { + off, err = r.Seek(start + shoff + int64(i*shentsize), 0); + if err != nil { + return nil, err; + } + + br = newBinaryReader(r, e.data); + switch e.class { + case 32: + panic("not reached"); + case 64: + shdr := &elf64Shdr{}; + br.ReadAny(shdr); + if err := br.Error(); err != nil { + return nil, err; + } + + s := &Section{ + r: r, + offset: start + int64(shdr.Off), + Size: shdr.Size, + Addr: uint64(shdr.Addr), + }; + secNames[i] = shdr.Name; + e.Sections[i] = s; + } + } + + // Resolve section names + off, err = r.Seek(start + e.Sections[shstrndx].offset, 0); + if err != nil { + return nil, err; + } + blob := make([]byte, e.Sections[shstrndx].Size); + n, err = io.ReadFull(r, blob); + strings := make(map[uint32] string); + strStart := uint32(0); + for i, c := range blob { + if c == 0 { + strings[strStart] = string(blob[strStart:i]); + strStart = uint32(i+1); + } + } + + for i, s := range e.Sections { + var ok bool; + s.Name, ok = strings[secNames[i]]; + if !ok { + return nil, &FormatError{start + shoff + int64(i*shentsize), "bad section name", secNames[i]}; + } + } + + return e, nil; +} + +// Section returns a section with the given name, or nil if no such +// section exists. +func (e *Elf) Section(name string) *Section { + for _, s := range e.Sections { + if s.Name == name { + return s; + } + } + return nil; +} + +/* + * Sections + */ + +type subReader struct { + r io.Reader; + rem uint64; +} + +func (r *subReader) Read(b []byte) (ret int, err os.Error) { + if r.rem == 0 { + return 0, os.EOF; + } + if uint64(len(b)) > r.rem { + b = b[0:r.rem]; + } + ret, err = r.r.Read(b); + r.rem -= uint64(ret); + if err == os.EOF { + err = io.ErrUnexpectedEOF; + } + return ret, err; +} + +// Open returns a reader backed by the data in this section. +// The original ELF file must still be open for this to work. +// The returned reader assumes there will be no seeks on the +// underlying file or any other opened section between the Open call +// and the last call to Read. +func (s *Section) Open() (io.Reader, os.Error) { + _, err := s.r.Seek(s.offset, 0); + if err != nil { + return nil, err; + } + return &subReader{s.r, s.Size}, nil; +} diff --git a/usr/austin/sym/elffmt.go b/usr/austin/sym/elffmt.go new file mode 100644 index 0000000000..dcfb8eaca8 --- /dev/null +++ b/usr/austin/sym/elffmt.go @@ -0,0 +1,389 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sym + +import "fmt"; + +/* + * ELF64 file format + */ + +type elf64Addr uint64 +type elf64Off uint64 + +type elf64Ehdr struct { +// Ident [elfIdentLen]uint8; // ELF identification + Type uint16; // Object file type + Machine uint16; // Machine type + Version uint32; // Object file version + Entry elf64Addr; // Entry point address + Phoff elf64Off; // Program header offset + Shoff elf64Off; // Section header offset + Flags uint32; // Processor-specific flags + Ehsize uint16; // ELF header size + Phentsize uint16; // Size of program header entry + Phnum uint16; // Number of program header entries + Shentsize uint16; // Size of section header entry + Shnum uint16; // Number of section header entries + Shstrndx uint16; // Section name string table indexes +} + +const ( + // Ident indexes + eiMag0 = 0; // File identification + eiMag1 = 1; + eiMag2 = 2; + eiMag3 = 3; + eiClass = 4; // File class + eiData = 5; // Data encoding + eiVersion = 6; // File version + eiOsABI = 7; // OS/ABI identification + eiABIVersion = 8; // ABI version + eiPad = 9; // Start of padding bytes + eiNIdent = 16; // Size of ident + + // Classes + elfClass32 = 1; // 32-bit objects + elfClass64 = 2; // 64-bit objects + + // Endians + elfData2LSB = 1; // Little-endian + elfData2MSB = 2; // Big-endian + + // Types + etNone = 0; // No file type + etRel = 1; // Relocatable object file + etExec = 2; // Executable file + etDyn = 3; // Shared object file + etCore = 4; // Core file + etLoOS = 0xFE00; // Environment-specific use + etHiOS = 0xFEFF; + etLoProc = 0xFF00; // Processor-specific use + etHiProc = 0xFFFF; + + evCurrent = 1; // Current version of format +) + +type elf64Shdr struct { + Name uint32; // Section name + Type uint32; // Section type + Flags uint64; // Section attributes + Addr elf64Addr; // Virtual address in memory + Off elf64Off; // Offset in file + Size uint64; // Size of section + Link uint32; // Link to other section + Info uint32; // Miscellaneous information + Addralign uint64; // Address alignment boundary + Entsize uint64; // Size of entries, if section has table +} + +const ( + // Section indices + shnUndef = 0; // Used to mark an undefined or meaningless section reference + shnLoProc = 0xFF00; // Processor-specific use + shnHiProc = 0xFF1F; + shnLoOS = 0xFF20; // Environment-specific use + shnHiOS = 0xFF3F; + shnAbs = 0xFFF1; // Indicates that the coresponding reference is an absolute value + shnCommon = 0xFFF2; // Indicates a symbol that has been declared as a common block + + // Section header types + shtNull = 0; // Unused section header + shtProgBits = 1; // Information defined by the program + shtSymTab = 2; // Linker symbol table + shtStrTab = 3; // String table + shtRela = 4; // "Rela" type relocation entries + shtHash = 5; // Symbol hash table + shtDynamic = 6; // Dynamic linking tables + shtNote = 7; // Note information + shtNoBits = 8; // Uninitialized space; does not occupy any space in the file + shtRel = 9; // "Rel" type relocation entries + shtShlib = 10; // Reserved + shtDynSym = 11; // A dynamic loader symbol table + shtLoOS = 0x60000000; // Environment-specific use + shtHiOS = 0x6FFFFFFF; + shtLoProc = 0x70000000; // Processor-specific use + shtHiProc = 0x7FFFFFFF; + + // Section header flags + shfWrite = 0x1; // Writable data + shfAlloc = 0x2; // Allocated in memory image of program + shfExecInstr = 0x4; // Executable instructions + shfMaskOS = 0x0F000000; // Environment-specific use + shfMaskProc = 0xF0000000; // Processor-specific use +) + +type elf64Phdr struct { + Type uint32; // Type of segment + Flags uint32; // Segment attributes + Off elf64Off; // Offset in file + Vaddr elf64Addr; // Virtual address in memory + Paddr elf64Addr; // Reserved + Filesz uint64; // Size of segment in file + Memsz uint64; // Size of segment in memory + Align uint64; // Alignment of segment +} + +const ( + ptNull = 0; // Unused entry + ptLoad = 1; // Loadable segment + ptDynamic = 2; // Dynamic linking tables + ptInterp = 3; // Program interpreter path name + ptNote = 4; // Note sections + ptPhdr = 6; // Program header table + + // Program header flags + pfX = 0x1; // Execute permission + pfW = 0x2; // Write permission + pfR = 0x4; // Read permission + pfMaskOS = 0x00FF0000; // Reserved for environment-specific use + pfMaskProc = 0xFF000000; // Reserved for processor-specific use +) + +/* + * Exported constants + */ + +type ElfType int + +const ( + ElfNone ElfType = etNone; + ElfRel = etRel; + ElfExec = etExec; + ElfDyn = etDyn; + ElfCore = etCore; +) + +type ElfMachine int + +const ( + ElfM32 ElfMachine = 1; + ElfSPARC ElfMachine = 2; + Elf386 ElfMachine = 3; + Elf68K ElfMachine = 4; + Elf88K ElfMachine = 5; + Elf860 ElfMachine = 7; + ElfMIPS ElfMachine = 8; + ElfS370 ElfMachine = 9; + ElfMIPS_RS3_LE ElfMachine = 10; + ElfPARISC ElfMachine = 15; + ElfVPP500 ElfMachine = 17; + ElfSPARC32PLUS ElfMachine = 18; + Elf960 ElfMachine = 19; + ElfPPC ElfMachine = 20; + ElfPPC64 ElfMachine = 21; + ElfS390 ElfMachine = 22; + ElfV800 ElfMachine = 36; + ElfFR20 ElfMachine = 37; + ElfRH32 ElfMachine = 38; + ElfRCE ElfMachine = 39; + ElfARM ElfMachine = 40; + ElfFAKE_ALPHA ElfMachine = 41; + ElfSH ElfMachine = 42; + ElfSPARCV9 ElfMachine = 43; + ElfTRICORE ElfMachine = 44; + ElfARC ElfMachine = 45; + ElfH8_300 ElfMachine = 46; + ElfH8_300H ElfMachine = 47; + ElfH8S ElfMachine = 48; + ElfH8_500 ElfMachine = 49; + ElfIA_64 ElfMachine = 50; + ElfMIPS_X ElfMachine = 51; + ElfCOLDFIRE ElfMachine = 52; + Elf68HC12 ElfMachine = 53; + ElfMMA ElfMachine = 54; + ElfPCP ElfMachine = 55; + ElfNCPU ElfMachine = 56; + ElfNDR1 ElfMachine = 57; + ElfSTARCORE ElfMachine = 58; + ElfME16 ElfMachine = 59; + ElfST100 ElfMachine = 60; + ElfTINYJ ElfMachine = 61; + ElfX86_64 ElfMachine = 62; + ElfPDSP ElfMachine = 63; + ElfFX66 ElfMachine = 66; + ElfST9PLUS ElfMachine = 67; + ElfST7 ElfMachine = 68; + Elf68HC16 ElfMachine = 69; + Elf68HC11 ElfMachine = 70; + Elf68HC08 ElfMachine = 71; + Elf68HC05 ElfMachine = 72; + ElfSVX ElfMachine = 73; + ElfST19 ElfMachine = 74; + ElfVAX ElfMachine = 75; + ElfCRIS ElfMachine = 76; + ElfJAVELIN ElfMachine = 77; + ElfFIREPATH ElfMachine = 78; + ElfZSP ElfMachine = 79; + ElfMMIX ElfMachine = 80; + ElfHUANY ElfMachine = 81; + ElfPRISM ElfMachine = 82; + ElfAVR ElfMachine = 83; + ElfFR30 ElfMachine = 84; + ElfD10V ElfMachine = 85; + ElfD30V ElfMachine = 86; + ElfV850 ElfMachine = 87; + ElfM32R ElfMachine = 88; + ElfMN10300 ElfMachine = 89; + ElfMN10200 ElfMachine = 90; + ElfPJ ElfMachine = 91; + ElfOPENRISC ElfMachine = 92; + ElfARC_A5 ElfMachine = 93; + ElfXTENSA ElfMachine = 94; +) + +func (m ElfMachine) String() string { + switch m { + case ElfMachine(0): + return "No machine"; + case ElfM32: + return "AT&T WE 32100"; + case ElfSPARC: + return "SUN SPARC"; + case Elf386: + return "Intel 80386"; + case Elf68K: + return "Motorola m68k family"; + case Elf88K: + return "Motorola m88k family"; + case Elf860: + return "Intel 80860"; + case ElfMIPS: + return "MIPS R3000 big-endian"; + case ElfS370: + return "IBM System/370"; + case ElfMIPS_RS3_LE: + return "MIPS R3000 little-endian"; + case ElfPARISC: + return "HPPA"; + case ElfVPP500: + return "Fujitsu VPP500"; + case ElfSPARC32PLUS: + return "Sun's \"v8plus\""; + case Elf960: + return "Intel 80960"; + case ElfPPC: + return "PowerPC"; + case ElfPPC64: + return "PowerPC 64-bit"; + case ElfS390: + return "IBM S390"; + case ElfV800: + return "NEC V800 series"; + case ElfFR20: + return "Fujitsu FR20"; + case ElfRH32: + return "TRW RH-32"; + case ElfRCE: + return "Motorola RCE"; + case ElfARM: + return "ARM"; + case ElfFAKE_ALPHA: + return "Digital Alpha"; + case ElfSH: + return "Hitachi SH"; + case ElfSPARCV9: + return "SPARC v9 64-bit"; + case ElfTRICORE: + return "Siemens Tricore"; + case ElfARC: + return "Argonaut RISC Core"; + case ElfH8_300: + return "Hitachi H8/300"; + case ElfH8_300H: + return "Hitachi H8/300H"; + case ElfH8S: + return "Hitachi H8S"; + case ElfH8_500: + return "Hitachi H8/500"; + case ElfIA_64: + return "Intel Merced"; + case ElfMIPS_X: + return "Stanford MIPS-X"; + case ElfCOLDFIRE: + return "Motorola Coldfire"; + case Elf68HC12: + return "Motorola M68HC12"; + case ElfMMA: + return "Fujitsu MMA Multimedia Accelerato"; + case ElfPCP: + return "Siemens PCP"; + case ElfNCPU: + return "Sony nCPU embeeded RISC"; + case ElfNDR1: + return "Denso NDR1 microprocessor"; + case ElfSTARCORE: + return "Motorola Start*Core processor"; + case ElfME16: + return "Toyota ME16 processor"; + case ElfST100: + return "STMicroelectronic ST100 processor"; + case ElfTINYJ: + return "Advanced Logic Corp. Tinyj emb.fa"; + case ElfX86_64: + return "AMD x86-64 architecture"; + case ElfPDSP: + return "Sony DSP Processor"; + case ElfFX66: + return "Siemens FX66 microcontroller"; + case ElfST9PLUS: + return "STMicroelectronics ST9+ 8/16 mc"; + case ElfST7: + return "STmicroelectronics ST7 8 bit mc"; + case Elf68HC16: + return "Motorola MC68HC16 microcontroller"; + case Elf68HC11: + return "Motorola MC68HC11 microcontroller"; + case Elf68HC08: + return "Motorola MC68HC08 microcontroller"; + case Elf68HC05: + return "Motorola MC68HC05 microcontroller"; + case ElfSVX: + return "Silicon Graphics SVx"; + case ElfST19: + return "STMicroelectronics ST19 8 bit mc"; + case ElfVAX: + return "Digital VAX"; + case ElfCRIS: + return "Axis Communications 32-bit embedded processor"; + case ElfJAVELIN: + return "Infineon Technologies 32-bit embedded processor"; + case ElfFIREPATH: + return "Element 14 64-bit DSP Processor"; + case ElfZSP: + return "LSI Logic 16-bit DSP Processor"; + case ElfMMIX: + return "Donald Knuth's educational 64-bit processor"; + case ElfHUANY: + return "Harvard University machine-independent object files"; + case ElfPRISM: + return "SiTera Prism"; + case ElfAVR: + return "Atmel AVR 8-bit microcontroller"; + case ElfFR30: + return "Fujitsu FR30"; + case ElfD10V: + return "Mitsubishi D10V"; + case ElfD30V: + return "Mitsubishi D30V"; + case ElfV850: + return "NEC v850"; + case ElfM32R: + return "Mitsubishi M32R"; + case ElfMN10300: + return "Matsushita MN10300"; + case ElfMN10200: + return "Matsushita MN10200"; + case ElfPJ: + return "picoJava"; + case ElfOPENRISC: + return "OpenRISC 32-bit embedded processor"; + case ElfARC_A5: + return "ARC Cores Tangent-A5"; + case ElfXTENSA: + return "Tensilica Xtensa Architecture"; + } + return fmt.Sprintf("", m); +} diff --git a/usr/austin/sym/gosymtab.go b/usr/austin/sym/gosymtab.go new file mode 100644 index 0000000000..096d3478ea --- /dev/null +++ b/usr/austin/sym/gosymtab.go @@ -0,0 +1,615 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sym + +// The Go symbol table and line number table formats are based on +// the Plan9 a.out format, which is documented here: +// http://plan9.bell-labs.com/magic/man2html/6/a.out +// The best reference for the differences between the Plan9 format and +// the Go format is the runtime source, particularly: +// src/pkg/runtime/symtab.c + +import ( + "io"; + "os"; + "sort"; + "strings"; +) + +/* + * Symbols + */ + +type GoSym interface { + Common() *CommonSym; +} + +// CommonSym represents information that all symbols have in common. +// The meaning of the symbol value differs between symbol types. +type CommonSym struct { + Value uint64; + Type byte; + Name string; + GoType uint64; +} + +func (c *CommonSym) Common() *CommonSym { + return c; +} + +// Static returns whether this symbol is static (not visible outside its file). +func (c *CommonSym) Static() bool { + switch c.Type { + case 't', 'l', 'd', 'b': + return true; + } + return false; +} + +// PackageName returns the package part of the symbol name, or empty +// string if there is none. +func (c *CommonSym) PackageName() string { + if i := strings.Index(c.Name, "·"); i != -1 { + return c.Name[0:i]; + } + return ""; +} + +// ReceiverName returns the receiver type name of this symbol, or +// empty string if there is none. +func (c *CommonSym) ReceiverName() string { + l := strings.Index(c.Name, "·"); + r := strings.LastIndex(c.Name, "·"); + if l == -1 || r == -1 { + return ""; + } + return c.Name[l+len("·"):r]; +} + +// BaseName returns the symbol name without the package or receiver name. +func (c *CommonSym) BaseName() string { + if i := strings.LastIndex(c.Name, "·"); i != -1 { + return c.Name[i+len("·"):len(c.Name)]; + } + return c.Name; +} + +// TextSym represents a function symbol. In addition to the common +// symbol fields, it has a frame size, parameters, and local variables. +type TextSym struct { + CommonSym; + obj *object; + lt *lineTable; + // Ths size of this function's frame. + FrameSize int; + // The value of each parameter symbol is its positive offset + // from the stack base pointer. This includes out parameters, + // even if they are unnamed. + Params []*ParamSym; + // The value of each local symbol is its negative offset from + // the stack base pointer. + Locals []*LocalSym; +} + +func (s *TextSym) Entry() uint64 { + return s.Value; +} + +type LeafSym struct { + CommonSym; +} + +type DataSym struct { + CommonSym; +} + +type BSSSym struct { + CommonSym; +} + +type FrameSym struct { + CommonSym; +} + +type LocalSym struct { + CommonSym; +} + +type ParamSym struct { + CommonSym; +} + +type PathSym struct { + CommonSym; +} + +/* + * Symbol tables + */ + +type object struct { + paths []*PathSym; + funcs []*TextSym; +} + +type lineTable struct { + blob []byte; + pc uint64; + line int; +} + +// GoSymTable represents a Go symbol table. It stores all of the +// symbols decoded from the program and provides methods to translate +// between symbols, names, and addresses. +type GoSymTable struct { + textEnd uint64; + Syms []GoSym; + funcs []*TextSym; +} + +func growGoSyms(s *[]GoSym) (*GoSym) { + n := len(*s); + if n == cap(*s) { + n := make([]GoSym, n, n * 2); + for i := range *s { + n[i] = (*s)[i]; + } + *s = n; + } + *s = (*s)[0:n+1]; + return &(*s)[n]; +} + +func (t *GoSymTable) readGoSymTab(r io.Reader) os.Error { + t.Syms = make([]GoSym, 0, 16); + filenames := make(map[uint32] string); + + br := newBinaryReader(r, msb); + off := int64(0); + for { + // Read symbol + value := br.ReadUint32(); + if br.Error() == os.EOF { + break; + } + typ := br.ReadUint8(); + if br.Error() == nil && typ & 0x80 == 0 { + return &FormatError{off, "bad symbol type code", typ}; + } + typ &^= 0x80; + name := br.ReadCString(); + extraOff := int64(0); + if typ == 'z' || typ == 'Z' { + if name != "" { + return &FormatError{off, "path symbol has non-empty name", name}; + } + // Decode path entry + for i := 0; ; i++ { + eltIdx := uint32(br.ReadUint16()); + extraOff += 2; + if eltIdx == 0 { + break; + } + elt, ok := filenames[eltIdx]; + if !ok { + return &FormatError{off, "bad filename code", eltIdx}; + } + if name != "" && name[len(name)-1] != '/' { + name += "/"; + } + name += elt; + } + } + gotype := br.ReadUint32(); + if err := br.Error(); err != nil { + if err == os.EOF { + err = io.ErrUnexpectedEOF; + } + return err; + } + + off += 4 + 1 + int64(len(name)) + 1 + extraOff + 4; + + // Handle file name components + if typ == 'f' { + filenames[value] = name; + } + + // Create the GoSym + sym := growGoSyms(&t.Syms); + + switch typ { + case 'T', 't': + *sym = &TextSym{}; + case 'L', 'l': + *sym = &LeafSym{}; + case 'D', 'd': + *sym = &DataSym{}; + case 'B', 'b': + *sym = &BSSSym{}; + case 'm': + *sym = &FrameSym{}; + case 'a': + *sym = &LocalSym{}; + case 'p': + *sym = &ParamSym{}; + case 'z', 'Z': + *sym = &PathSym{}; + default: + *sym = &CommonSym{}; + } + + common := sym.Common(); + common.Value = uint64(value); + common.Type = typ; + common.Name = name; + common.GoType = uint64(gotype); + } + + return nil; +} + +// byValue is a []*TextSym sorter. +type byValue []*TextSym + +func (s byValue) Len() int { + return len(s); +} + +func (s byValue) Less(i, j int) bool { + return s[i].Value < s[j].Value; +} + +func (s byValue) Swap(i, j int) { + t := s[i]; + s[i] = s[j]; + s[j] = t; +} + +func (t *GoSymTable) processTextSyms() { + // Count text symbols and attach frame sizes, parameters, and + // locals to them. Also, find object file boundaries. + count := 0; + var obj *object; + var objCount int; + for i := 0; i < len(t.Syms); i++ { + switch sym := t.Syms[i].(type) { + case *PathSym: + // Finish the current object + if obj != nil { + obj.funcs = make([]*TextSym, 0, objCount); + } + + // Count path symbols + end := i+1; + for ; end < len(t.Syms); end++ { + _, ok := t.Syms[end].(*PathSym); + if !ok { + break; + } + } + + // Copy path symbols + obj = &object{make([]*PathSym, end - i), nil}; + for j, s := range t.Syms[i:end] { + obj.paths[j] = s.(*PathSym); + } + + objCount = 0; + i = end-1; + + case *TextSym: + if sym.Name == "etext" { + continue; + } + + // Count parameter and local syms + var np, nl int; + end := i+1; + countloop: + for ; end < len(t.Syms); end++ { + switch _ := t.Syms[end].(type) { + // TODO(austin) Use type switch list + case *TextSym: + break countloop; + case *PathSym: + break countloop; + case *ParamSym: + np++; + case *LocalSym: + nl++; + } + } + + // Fill in the function symbol + var ip, ia int; + sym.obj = obj; + sym.Params = make([]*ParamSym, np); + sym.Locals = make([]*LocalSym, nl); + for _, s := range t.Syms[i:end] { + switch s := s.(type) { + case *FrameSym: + sym.FrameSize = int(s.Value); + case *ParamSym: + sym.Params[ip] = s; + ip++; + case *LocalSym: + sym.Locals[ia] = s; + ia++; + } + } + + count++; + objCount++; + i = end-1; + } + } + + if obj != nil { + obj.funcs = make([]*TextSym, 0, objCount); + } + + // Extract text symbols into function array and individual + // object function arrys. + t.funcs = make([]*TextSym, 0, count); + for _, sym := range t.Syms { + sym, ok := sym.(*TextSym); + if !ok || sym.Name == "etext" { + continue; + } + + t.funcs = t.funcs[0:len(t.funcs)+1]; + t.funcs[len(t.funcs)-1] = sym; + sym.obj.funcs = sym.obj.funcs[0:len(sym.obj.funcs)+1]; + sym.obj.funcs[len(sym.obj.funcs)-1] = sym; + } + + // Sort text symbols + sort.Sort(byValue(t.funcs)); +} + +func (t *GoSymTable) sliceLineTable(lt *lineTable) { + for _, fn := range t.funcs { + fn.lt = lt.slice(fn.Entry()); + lt = fn.lt;; + } +} + +// SymFromPC looks up a text symbol given a program counter within +// some function. Returns nil if no function contains this PC. +func (t *GoSymTable) SymFromPC(pc uint64) *TextSym { + syms := t.funcs; + if pc > t.textEnd { + return nil; + } + + if len(syms) == 0 || pc < syms[0].Value { + return nil; + } + if pc >= syms[len(syms)-1].Value { + return syms[len(syms)-1]; + } + + l := 0; + n := len(syms); + for n > 0 { + m := n/2; + s := syms[l+m]; + switch { + case s.Value <= pc && pc < syms[l+m+1].Value: + return s; + case pc < s.Value: + n = m; + default: + l += m+1; + n -= m+1; + } + } + panic("not reached, pc=", pc); +} + +// LineFromPC looks up line number information for a program counter. +// Returns a file path, a line number within that file, and the +// TextSym at pc. +func (t *GoSymTable) LineFromPC(pc uint64) (string, int, *TextSym) { + sym := t.SymFromPC(pc); + if sym == nil { + return "", 0, nil; + } + + aline := sym.lt.alineFromPC(pc); + + path, line := sym.obj.lineFromAline(aline); + + return path, line, sym; +} + +// SymFromName looks up a symbol by name. The name must refer to a +// global text, data, or BSS symbol. +func (t *GoSymTable) SymFromName(name string) GoSym { + // TODO(austin) Maybe make a map + for _, v := range t.Syms { + c := v.Common(); + switch c.Type { + case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b': + if c.Name == name { + return v; + } + } + } + return nil; +} + +// SymFromAddr looks up a symbol by address. The symbol will be a +// text, data, or BSS symbol. addr must be the exact address of the +// symbol, unlike for SymFromPC. +func (t *GoSymTable) SymFromAddr(addr uint64) GoSym { + // TODO(austin) Maybe make a map + for _, v := range t.Syms { + c := v.Common(); + switch c.Type { + case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b': + if c.Value == addr { + return v; + } + } + } + return nil; +} + +// TODO(austin) Implement PCFromLine. This is more difficult because +// we first have to figure out which object file PC is in, and which +// segment of the line table that corresponds to. +// +// For each place path appears (either from push or pop), +// 1. Turn line into an absolute line number using the history stack +// 2. minpc = Entry of the first text sym in the object +// 3. maxpc = Entry of the first text sym in the next object +// 4. lt = lt.slice(minpc); +// 5. Find PC of first occurrence of absolute line number between minpc and maxpc +// +// I'm not sure if this guarantees a PC at the begining of an +// instruction. + +/* + * Object files + */ + +func (o *object) lineFromAline(aline int) (string, int) { + type stackEnt struct { + path string; + start int; + offset int; + prev *stackEnt; + }; + + noPath := &stackEnt{"", 0, 0, nil}; + tos := noPath; + + // TODO(austin) I have no idea how 'Z' symbols work, except + // that they pop the stack. + for _, s := range o.paths { + val := int(s.Value); + switch { + case val > aline: + break; + + case val == 1: + // Start a new stack + tos = &stackEnt{s.Name, val, 0, noPath}; + + case s.Name == "": + // Pop + if tos == noPath { + return "", 0; + } + tos.prev.offset += val - tos.start; + tos = tos.prev; + + default: + // Push + tos = &stackEnt{s.Name, val, 0, tos}; + } + } + + return tos.path, aline - tos.start - tos.offset + 1; +} + +/* + * Line tables + */ + +func (lt *lineTable) parse(targetPC uint64) ([]byte, uint64, int) { + // The PC/line table can be thought of as a sequence of + // * + // batches. Each update batch results in a (pc, line) pair, + // where line applies to every PC from pc up to but not + // including the pc of the next pair. + // + // Here we process each update individually, which simplifies + // the code, but makes the corner cases more confusing. + + const quantum = 1; + b, pc, line := lt.blob, lt.pc, lt.line; + for pc <= targetPC && len(b) != 0 { + code := b[0]; + b = b[1:len(b)]; + switch { + case code == 0: + if len(b) < 4 { + b = b[0:1]; + break; + } + val := msb.Uint32(b); + b = b[4:len(b)]; + line += int(val); + case code <= 64: + line += int(code); + case code <= 128: + line -= int(code - 64); + default: + pc += quantum*uint64(code - 128); + continue; + } + pc += quantum; + } + return b, pc, line; +} + +func (lt *lineTable) slice(pc uint64) *lineTable { + blob, pc, line := lt.parse(pc); + return &lineTable{blob, pc, line}; +} + +func (lt *lineTable) alineFromPC(targetPC uint64) int { + _1, _2, aline := lt.parse(targetPC); + return aline; +} + +/* + * ELF + */ + +func ElfGoSyms(elf *Elf) (*GoSymTable, os.Error) { + text := elf.Section(".text"); + if text == nil { + return nil, nil; + } + + tab := &GoSymTable{textEnd: text.Addr + text.Size}; + + // Symbol table + sec := elf.Section(".gosymtab"); + if sec == nil { + return nil, nil; + } + sr, err := sec.Open(); + if err != nil { + return nil, err; + } + err = tab.readGoSymTab(sr); + if err != nil { + return nil, err; + } + + // Line table + sec = elf.Section(".gopclntab"); + if sec == nil { + return nil, nil; + } + sr, err = sec.Open(); + if err != nil { + return nil, err; + } + blob, err := io.ReadAll(sr); + if err != nil { + return nil, err; + } + lt := &lineTable{blob, text.Addr, 0}; + + tab.processTextSyms(); + tab.sliceLineTable(lt); + return tab, nil; +} -- 2.48.1