From e08f10b8b5fbb82ff1e2c263ad57e19d2de1e323 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Wed, 22 Apr 2020 19:21:30 -0400 Subject: [PATCH] [dev.link] cmd/internal/goobj2: add index fingerprint to object file The new object files use indices for symbol references, instead of names. Fundamental to the design, it requires that the importing and imported packages have consistent view of symbol indices. The Go command should already ensure this, when using "go build". But in case it goes wrong, it could lead to obscure errors like run-time crashes. It would be better to check the index consistency at build time. To do that, we add a fingerprint to each object file, which is a hash of symbol indices. In the object file it records the fingerprints of all imported packages, as well as its own fingerprint. At link time, the linker checks that a package's fingerprint matches the fingerprint recorded in the importing packages, and issue an error if they don't match. This CL does the first part: introducing the fingerprint in the object file, and propagating fingerprints through importing/exporting by the compiler. It is not yet used by the linker. Next CL will do. Change-Id: I0aa372da652e4afb11f2867cb71689a3e3f9966e Reviewed-on: https://go-review.googlesource.com/c/go/+/229617 Reviewed-by: Austin Clements Reviewed-by: Than McIntosh Reviewed-by: Jeremy Faller --- src/cmd/compile/internal/gc/iexport.go | 6 +++ src/cmd/compile/internal/gc/iimport.go | 11 ++++- src/cmd/compile/internal/gc/main.go | 22 +++++----- src/cmd/internal/goobj/readnew.go | 6 ++- src/cmd/internal/goobj2/objfile.go | 58 +++++++++++++++++++------- src/cmd/internal/obj/line.go | 5 ++- src/cmd/internal/obj/link.go | 5 ++- src/cmd/internal/obj/objfile.go | 5 ++- src/cmd/internal/obj/objfile2.go | 14 ++++--- src/cmd/internal/obj/sym.go | 10 +++++ src/cmd/link/internal/loader/loader.go | 6 ++- 11 files changed, 111 insertions(+), 37 deletions(-) diff --git a/src/cmd/compile/internal/gc/iexport.go b/src/cmd/compile/internal/gc/iexport.go index 917bf2394a..35b8d985cb 100644 --- a/src/cmd/compile/internal/gc/iexport.go +++ b/src/cmd/compile/internal/gc/iexport.go @@ -35,6 +35,8 @@ // } // } // +// Fingerprint [8]byte +// // uvarint means a uint64 written out using uvarint encoding. // // []T means a uvarint followed by that many T objects. In other @@ -296,6 +298,10 @@ func iexport(out *bufio.Writer) { io.Copy(out, &hdr) io.Copy(out, &p.strings) io.Copy(out, &p.data0) + + // Add fingerprint (used by linker object file). + // Attach this to the end, so tools (e.g. gcimporter) don't care. + out.Write(Ctxt.Fingerprint[:]) } // writeIndex writes out an object index. mainIndex indicates whether diff --git a/src/cmd/compile/internal/gc/iimport.go b/src/cmd/compile/internal/gc/iimport.go index f881a33ea7..f2f49f002c 100644 --- a/src/cmd/compile/internal/gc/iimport.go +++ b/src/cmd/compile/internal/gc/iimport.go @@ -10,6 +10,7 @@ package gc import ( "cmd/compile/internal/types" "cmd/internal/bio" + "cmd/internal/goobj2" "cmd/internal/obj" "cmd/internal/src" "encoding/binary" @@ -95,7 +96,7 @@ func (r *intReader) uint64() uint64 { return i } -func iimport(pkg *types.Pkg, in *bio.Reader) { +func iimport(pkg *types.Pkg, in *bio.Reader) (fingerprint goobj2.FingerprintType) { ir := &intReader{in, pkg} version := ir.uint64() @@ -188,6 +189,14 @@ func iimport(pkg *types.Pkg, in *bio.Reader) { inlineImporter[s] = iimporterAndOffset{p, off} } } + + // Fingerprint + n, err := in.Read(fingerprint[:]) + if err != nil || n != len(fingerprint) { + yyerror("import %s: error reading fingerprint", pkg.Path) + errorexit() + } + return fingerprint } type iimporter struct { diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index 2152c619fa..756cdbd3c9 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -14,6 +14,7 @@ import ( "cmd/compile/internal/types" "cmd/internal/bio" "cmd/internal/dwarf" + "cmd/internal/goobj2" "cmd/internal/obj" "cmd/internal/objabi" "cmd/internal/src" @@ -1254,15 +1255,6 @@ func importfile(f *Val) *types.Pkg { } } - // assume files move (get installed) so don't record the full path - if packageFile != nil { - // If using a packageFile map, assume path_ can be recorded directly. - Ctxt.AddImport(path_) - } else { - // For file "/Users/foo/go/pkg/darwin_amd64/math.a" record "math.a". - Ctxt.AddImport(file[len(file)-len(path_)-len(".a"):]) - } - // In the importfile, if we find: // $$\n (textual format): not supported anymore // $$B\n (binary format) : import directly, then feed the lexer a dummy statement @@ -1287,6 +1279,7 @@ func importfile(f *Val) *types.Pkg { c, _ = imp.ReadByte() } + var fingerprint goobj2.FingerprintType switch c { case '\n': yyerror("cannot import %s: old export format no longer supported (recompile library)", path_) @@ -1310,13 +1303,22 @@ func importfile(f *Val) *types.Pkg { yyerror("import %s: unexpected package format byte: %v", file, c) errorexit() } - iimport(importpkg, imp) + fingerprint = iimport(importpkg, imp) default: yyerror("no import in %q", path_) errorexit() } + // assume files move (get installed) so don't record the full path + if packageFile != nil { + // If using a packageFile map, assume path_ can be recorded directly. + Ctxt.AddImport(path_, fingerprint) + } else { + // For file "/Users/foo/go/pkg/darwin_amd64/math.a" record "math.a". + Ctxt.AddImport(file[len(file)-len(path_)-len(".a"):], fingerprint) + } + if importpkg.Height >= myheight { myheight = importpkg.Height + 1 } diff --git a/src/cmd/internal/goobj/readnew.go b/src/cmd/internal/goobj/readnew.go index 5654da44d6..3e710576b6 100644 --- a/src/cmd/internal/goobj/readnew.go +++ b/src/cmd/internal/goobj/readnew.go @@ -25,7 +25,11 @@ func (r *objReader) readNew() { } // Imports - r.p.Imports = rr.Autolib() + autolib := rr.Autolib() + for _, p := range autolib { + r.p.Imports = append(r.p.Imports, p.Pkg) + // Ignore fingerprint (for tools like objdump which only reads one object). + } pkglist := rr.Pkglist() diff --git a/src/cmd/internal/goobj2/objfile.go b/src/cmd/internal/goobj2/objfile.go index bee29a0ad6..3d3bc20133 100644 --- a/src/cmd/internal/goobj2/objfile.go +++ b/src/cmd/internal/goobj2/objfile.go @@ -19,17 +19,21 @@ import ( // New object file format. // // Header struct { -// Magic [...]byte // "\x00go115ld" -// Flags uint32 -// // TODO: Fingerprint -// Offsets [...]uint32 // byte offset of each block below +// Magic [...]byte // "\x00go115ld" +// Fingerprint [8]byte +// Flags uint32 +// Offsets [...]uint32 // byte offset of each block below // } // // Strings [...]struct { // Data [...]byte // } // -// Autolib [...]string // imported packages (for file loading) // TODO: add fingerprints +// Autolib [...]struct { // imported packages (for file loading) +// Pkg string +// Fingerprint [8]byte +// } +// // PkgIndex [...]string // referenced packages by index // // DwarfFiles [...]string @@ -119,6 +123,8 @@ import ( const stringRefSize = 8 // two uint32s +type FingerprintType [8]byte + // Package Index. const ( PkgIdxNone = (1<<31 - 1) - iota // Non-package symbols @@ -149,15 +155,17 @@ const ( // File header. // TODO: probably no need to export this. type Header struct { - Magic string - Flags uint32 - Offsets [NBlk]uint32 + Magic string + Fingerprint FingerprintType + Flags uint32 + Offsets [NBlk]uint32 } const Magic = "\x00go115ld" func (h *Header) Write(w *Writer) { w.RawString(h.Magic) + w.Bytes(h.Fingerprint[:]) w.Uint32(h.Flags) for _, x := range h.Offsets { w.Uint32(x) @@ -171,6 +179,8 @@ func (h *Header) Read(r *Reader) error { return errors.New("wrong magic, not a Go object file") } off := uint32(len(h.Magic)) + copy(h.Fingerprint[:], r.BytesAt(off, len(h.Fingerprint))) + off += 8 h.Flags = r.uint32At(off) off += 4 for i := range h.Offsets { @@ -184,6 +194,19 @@ func (h *Header) Size() int { return len(h.Magic) + 4 + 4*len(h.Offsets) } +// Autolib +type ImportedPkg struct { + Pkg string + Fingerprint FingerprintType +} + +const importedPkgSize = stringRefSize + 8 + +func (p *ImportedPkg) Write(w *Writer) { + w.StringRef(p.Pkg) + w.Bytes(p.Fingerprint[:]) +} + // Symbol definition. // // Serialized format: @@ -495,12 +518,18 @@ func (r *Reader) StringRef(off uint32) string { return r.StringAt(r.uint32At(off+4), l) } -func (r *Reader) Autolib() []string { - n := (r.h.Offsets[BlkAutolib+1] - r.h.Offsets[BlkAutolib]) / stringRefSize - s := make([]string, n) +func (r *Reader) Fingerprint() FingerprintType { + return r.h.Fingerprint +} + +func (r *Reader) Autolib() []ImportedPkg { + n := (r.h.Offsets[BlkAutolib+1] - r.h.Offsets[BlkAutolib]) / importedPkgSize + s := make([]ImportedPkg, n) + off := r.h.Offsets[BlkAutolib] for i := range s { - off := r.h.Offsets[BlkAutolib] + uint32(i)*stringRefSize - s[i] = r.StringRef(off) + s[i].Pkg = r.StringRef(off) + copy(s[i].Fingerprint[:], r.BytesAt(off+stringRefSize, len(s[i].Fingerprint))) + off += importedPkgSize } return s } @@ -508,9 +537,10 @@ func (r *Reader) Autolib() []string { func (r *Reader) Pkglist() []string { n := (r.h.Offsets[BlkPkgIdx+1] - r.h.Offsets[BlkPkgIdx]) / stringRefSize s := make([]string, n) + off := r.h.Offsets[BlkPkgIdx] for i := range s { - off := r.h.Offsets[BlkPkgIdx] + uint32(i)*stringRefSize s[i] = r.StringRef(off) + off += stringRefSize } return s } diff --git a/src/cmd/internal/obj/line.go b/src/cmd/internal/obj/line.go index fecf90c491..79ecb0068f 100644 --- a/src/cmd/internal/obj/line.go +++ b/src/cmd/internal/obj/line.go @@ -5,12 +5,13 @@ package obj import ( + "cmd/internal/goobj2" "cmd/internal/src" ) // AddImport adds a package to the list of imported packages. -func (ctxt *Link) AddImport(pkg string) { - ctxt.Imports = append(ctxt.Imports, pkg) +func (ctxt *Link) AddImport(pkg string, fingerprint goobj2.FingerprintType) { + ctxt.Imports = append(ctxt.Imports, goobj2.ImportedPkg{Pkg: pkg, Fingerprint: fingerprint}) } func linkgetlineFromPos(ctxt *Link, xpos src.XPos) (f string, l int32) { diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 046ad53ac7..e6f917dedb 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -33,6 +33,7 @@ package obj import ( "bufio" "cmd/internal/dwarf" + "cmd/internal/goobj2" "cmd/internal/objabi" "cmd/internal/src" "cmd/internal/sys" @@ -666,7 +667,7 @@ type Link struct { PosTable src.PosTable InlTree InlTree // global inlining tree used by gc/inl.go DwFixups *DwarfFixupTable - Imports []string + Imports []goobj2.ImportedPkg DiagFunc func(string, ...interface{}) DiagFlush func() DebugInfo func(fn *LSym, info *LSym, curfn interface{}) ([]dwarf.Scope, dwarf.InlCalls) // if non-nil, curfn is a *gc.Node @@ -698,6 +699,8 @@ type Link struct { defs []*LSym // list of defined symbols in the current package nonpkgdefs []*LSym // list of defined non-package symbols nonpkgrefs []*LSym // list of referenced non-package symbols + + Fingerprint goobj2.FingerprintType // fingerprint of symbol indices, to catch index mismatch } func (ctxt *Link) Diag(format string, args ...interface{}) { diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index 2b0c45d6b2..6d7f42ed0b 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -98,8 +98,9 @@ func WriteObjFile(ctxt *Link, bout *bio.Writer, pkgpath string) { w.wr.WriteByte(1) // Autolib - for _, pkg := range ctxt.Imports { - w.writeString(pkg) + for _, p := range ctxt.Imports { + w.writeString(p.Pkg) + // This object format ignores p.Fingerprint. } w.writeString("") diff --git a/src/cmd/internal/obj/objfile2.go b/src/cmd/internal/obj/objfile2.go index 9792ef0846..061e43c434 100644 --- a/src/cmd/internal/obj/objfile2.go +++ b/src/cmd/internal/obj/objfile2.go @@ -38,7 +38,11 @@ func WriteObjFile2(ctxt *Link, b *bio.Writer, pkgpath string) { if ctxt.Flag_shared { flags |= goobj2.ObjFlagShared } - h := goobj2.Header{Magic: goobj2.Magic, Flags: flags} + h := goobj2.Header{ + Magic: goobj2.Magic, + Fingerprint: ctxt.Fingerprint, + Flags: flags, + } h.Write(w.Writer) // String table @@ -46,8 +50,8 @@ func WriteObjFile2(ctxt *Link, b *bio.Writer, pkgpath string) { // Autolib h.Offsets[goobj2.BlkAutolib] = w.Offset() - for _, pkg := range ctxt.Imports { - w.StringRef(pkg) + for i := range ctxt.Imports { + ctxt.Imports[i].Write(w.Writer) } // Package references @@ -180,8 +184,8 @@ func (w *writer) init() { func (w *writer) StringTable() { w.AddString("") - for _, pkg := range w.ctxt.Imports { - w.AddString(pkg) + for _, p := range w.ctxt.Imports { + w.AddString(p.Pkg) } for _, pkg := range w.pkglist { w.AddString(pkg) diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go index 03ce8ddc5a..4a8b0ebb6f 100644 --- a/src/cmd/internal/obj/sym.go +++ b/src/cmd/internal/obj/sym.go @@ -34,6 +34,7 @@ package obj import ( "cmd/internal/goobj2" "cmd/internal/objabi" + "crypto/md5" "fmt" "log" "math" @@ -241,6 +242,15 @@ func (ctxt *Link) NumberSyms(asm bool) { ctxt.pkgIdx[pkg] = ipkg ipkg++ }) + + // Compute a fingerprint of the indices, for exporting. + if !asm { + h := md5.New() + for _, s := range ctxt.defs { + h.Write([]byte(s.Name)) + } + copy(ctxt.Fingerprint[:], h.Sum(nil)[:]) + } } // Returns whether s is a non-package symbol, which needs to be referenced diff --git a/src/cmd/link/internal/loader/loader.go b/src/cmd/link/internal/loader/loader.go index 114bd43477..7b59e680ee 100644 --- a/src/cmd/link/internal/loader/loader.go +++ b/src/cmd/link/internal/loader/loader.go @@ -1779,7 +1779,11 @@ func (l *Loader) Preload(syms *sym.Symbols, f *bio.Reader, lib *sym.Library, uni or := &oReader{r, unit, localSymVersion, r.Flags(), pkgprefix, make([]Sym, ndef+nnonpkgdef+r.NNonpkgref()), ndef, uint32(len(l.objs))} // Autolib - lib.ImportStrings = append(lib.ImportStrings, r.Autolib()...) + autolib := r.Autolib() + for _, p := range autolib { + lib.ImportStrings = append(lib.ImportStrings, p.Pkg) + // TODO: fingerprint is ignored for now + } // DWARF file table nfile := r.NDwarfFile() -- 2.50.0