From: Cherry Zhang Date: Thu, 16 Jul 2020 03:14:50 +0000 (-0400) Subject: [dev.link] cmd/internal/obj: handle content-addressable symbols with relocations X-Git-Tag: go1.16beta1~1378^2~46 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=526d99a49ae67bfde15134b96159680988615d2d;p=gostls13.git [dev.link] cmd/internal/obj: handle content-addressable symbols with relocations For content-addressable symbols with relocations, we build a content hash based on its content and relocations. Depending on the category of the referenced symbol, we choose different hash algorithms such that the hash is globally consistent. For now, we only support content-addressable symbols with relocations when the current package's import path is known, so that the symbol names are fully expanded. Otherwise, if the referenced symbol is a named symbol whose name is not fully expanded, the hash won't be globally consistent, and can cause erroneous collisions. This is fine for now, as the deduplication is just an optimization, not a requirement for correctness (until we get to type descriptors). Change-Id: I639e4e03dd749b5d71f0a55c2525926575b1ac30 Reviewed-on: https://go-review.googlesource.com/c/go/+/243142 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Jeremy Faller --- diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go index 31d8549d2d..a6eb44de73 100644 --- a/src/cmd/asm/main.go +++ b/src/cmd/asm/main.go @@ -41,6 +41,7 @@ func main() { ctxt.Flag_dynlink = *flags.Dynlink ctxt.Flag_shared = *flags.Shared || *flags.Dynlink ctxt.IsAsm = true + ctxt.Pkgpath = *flags.Importpath switch *flags.Spectre { default: log.Printf("unknown setting -spectre=%s", *flags.Spectre) @@ -97,7 +98,7 @@ func main() { } if ok && !*flags.SymABIs { ctxt.NumberSyms() - obj.WriteObjFile(ctxt, buf, *flags.Importpath) + obj.WriteObjFile(ctxt, buf) } if !ok || diag { if failedFile != "" { diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index 6e204f49bc..bb28ef01a1 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -789,6 +789,7 @@ func Main(archInit func(*Arch)) { // Write object data to disk. timings.Start("be", "dumpobj") dumpdata() + Ctxt.Pkgpath = myimportpath Ctxt.NumberSyms() dumpobj() if asmhdr != "" { diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go index 26ea775368..0826b04e33 100644 --- a/src/cmd/compile/internal/gc/obj.go +++ b/src/cmd/compile/internal/gc/obj.go @@ -166,7 +166,7 @@ func dumpLinkerObj(bout *bio.Writer) { fmt.Fprintf(bout, "\n!\n") - obj.WriteObjFile(Ctxt, bout, myimportpath) + obj.WriteObjFile(Ctxt, bout) } func addptabs() { diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index ffc3e99a20..195af8494c 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -671,6 +671,7 @@ type Link struct { Retpoline bool // emit use of retpoline stubs for indirect jmp/call Bso *bufio.Writer Pathname string + Pkgpath string // the current package's import path, "" if unknown hashmu sync.Mutex // protects hash, funchash hash map[string]*LSym // name -> sym mapping funchash map[string]*LSym // name -> sym mapping for ABIInternal syms diff --git a/src/cmd/internal/obj/objfile2.go b/src/cmd/internal/obj/objfile2.go index 6ac23bc418..6a5f3726f8 100644 --- a/src/cmd/internal/obj/objfile2.go +++ b/src/cmd/internal/obj/objfile2.go @@ -12,13 +12,15 @@ import ( "cmd/internal/goobj2" "cmd/internal/objabi" "crypto/sha1" + "encoding/binary" "fmt" + "io" "path/filepath" "strings" ) // Entry point of writing new object file. -func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) { +func WriteObjFile(ctxt *Link, b *bio.Writer) { debugAsmEmit(ctxt) @@ -27,7 +29,7 @@ func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) { w := writer{ Writer: goobj2.NewWriter(b), ctxt: ctxt, - pkgpath: objabi.PathToPrefix(pkgpath), + pkgpath: objabi.PathToPrefix(ctxt.Pkgpath), } start := b.Offset() @@ -39,7 +41,7 @@ func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) { if ctxt.Flag_shared { flags |= goobj2.ObjFlagShared } - if pkgpath == "" { + if w.pkgpath == "" { flags |= goobj2.ObjFlagNeedNameExpansion } if ctxt.IsAsm { @@ -336,19 +338,80 @@ func (w *writer) Hash64(s *LSym) { if !s.ContentAddressable() || len(s.R) != 0 { panic("Hash of non-content-addresable symbol") } - var b goobj2.Hash64Type - copy(b[:], s.P) + b := contentHash64(s) w.Bytes(b[:]) } func (w *writer) Hash(s *LSym) { - if !s.ContentAddressable() || len(s.R) != 0 { // TODO: currently we don't support content-addressable symbols with relocations + if !s.ContentAddressable() { panic("Hash of non-content-addresable symbol") } - b := goobj2.HashType(sha1.Sum(s.P)) + b := w.contentHash(s) w.Bytes(b[:]) } +func contentHash64(s *LSym) goobj2.Hash64Type { + var b goobj2.Hash64Type + copy(b[:], s.P) + return b +} + +// Compute the content hash for a content-addressable symbol. +// We build a content hash based on its content and relocations. +// Depending on the category of the referenced symbol, we choose +// different hash algorithms such that the hash is globally +// consistent. +// - For referenced content-addressable symbol, its content hash +// is globally consistent. +// - For package symbol, its local index is globally consistent. +// - For non-package symbol, its fully-expanded name is globally +// consistent. For now, we require we know the current package +// path so we can always expand symbol names. (Otherwise, +// symbols with relocations are not considered hashable.) +// +// For now, we assume there is no circular dependencies among +// hashed symbols. +func (w *writer) contentHash(s *LSym) goobj2.HashType { + h := sha1.New() + h.Write(s.P) + var tmp [14]byte + for i := range s.R { + r := &s.R[i] + binary.LittleEndian.PutUint32(tmp[:4], uint32(r.Off)) + tmp[4] = r.Siz + tmp[5] = uint8(r.Type) + binary.LittleEndian.PutUint64(tmp[6:14], uint64(r.Add)) + h.Write(tmp[:]) + rs := r.Sym + switch rs.PkgIdx { + case goobj2.PkgIdxHashed64: + h.Write([]byte{0}) + t := contentHash64(rs) + h.Write(t[:]) + case goobj2.PkgIdxHashed: + h.Write([]byte{1}) + t := w.contentHash(rs) + h.Write(t[:]) + case goobj2.PkgIdxBuiltin: + panic("unsupported") + case goobj2.PkgIdxNone: + h.Write([]byte{2}) + io.WriteString(h, rs.Name) // name is already expanded at this point + case goobj2.PkgIdxSelf: + io.WriteString(h, w.pkgpath) + binary.LittleEndian.PutUint32(tmp[:4], uint32(rs.SymIdx)) + h.Write(tmp[:4]) + default: + io.WriteString(h, rs.Pkg) + binary.LittleEndian.PutUint32(tmp[:4], uint32(rs.SymIdx)) + h.Write(tmp[:4]) + } + } + var b goobj2.HashType + copy(b[:], h.Sum(nil)) + return b +} + func makeSymRef(s *LSym) goobj2.SymRef { if s == nil { return goobj2.SymRef{} diff --git a/src/cmd/internal/obj/objfile_test.go b/src/cmd/internal/obj/objfile_test.go new file mode 100644 index 0000000000..ed3be20760 --- /dev/null +++ b/src/cmd/internal/obj/objfile_test.go @@ -0,0 +1,87 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +import ( + "cmd/internal/goobj2" + "cmd/internal/sys" + "testing" +) + +var dummyArch = LinkArch{Arch: sys.ArchAMD64} + +func TestContentHash64(t *testing.T) { + s1 := &LSym{P: []byte("A")} + s2 := &LSym{P: []byte("A\x00\x00\x00")} + s1.Set(AttrContentAddressable, true) + s2.Set(AttrContentAddressable, true) + h1 := contentHash64(s1) + h2 := contentHash64(s2) + if h1 != h2 { + t.Errorf("contentHash64(s1)=%x, contentHash64(s2)=%x, expect equal", h1, h2) + } + + ctxt := Linknew(&dummyArch) // little endian + s3 := ctxt.Int64Sym(int64('A')) + h3 := contentHash64(s3) + if h1 != h3 { + t.Errorf("contentHash64(s1)=%x, contentHash64(s3)=%x, expect equal", h1, h3) + } +} + +func TestContentHash(t *testing.T) { + syms := []*LSym{ + &LSym{P: []byte("TestSymbol")}, // 0 + &LSym{P: []byte("TestSymbol")}, // 1 + &LSym{P: []byte("TestSymbol2")}, // 2 + &LSym{P: []byte("")}, // 3 + &LSym{P: []byte("")}, // 4 + &LSym{P: []byte("")}, // 5 + &LSym{P: []byte("")}, // 6 + } + for _, s := range syms { + s.Set(AttrContentAddressable, true) + s.PkgIdx = goobj2.PkgIdxHashed + } + // s3 references s0 + r := Addrel(syms[3]) + r.Sym = syms[0] + // s4 references s0 + r = Addrel(syms[4]) + r.Sym = syms[0] + // s5 references s1 + r = Addrel(syms[5]) + r.Sym = syms[1] + // s6 references s2 + r = Addrel(syms[6]) + r.Sym = syms[2] + + // compute hashes + h := make([]goobj2.HashType, len(syms)) + w := &writer{} + for i := range h { + h[i] = w.contentHash(syms[i]) + } + + tests := []struct { + a, b int + equal bool + }{ + {0, 1, true}, // same contents, no relocs + {0, 2, false}, // different contents + {3, 4, true}, // same contents, same relocs + {3, 5, true}, // recursively same contents + {3, 6, false}, // same contents, different relocs + } + for _, test := range tests { + if (h[test.a] == h[test.b]) != test.equal { + eq := "equal" + if !test.equal { + eq = "not equal" + } + t.Errorf("h%d=%x, h%d=%x, expect %s", test.a, h[test.a], test.b, h[test.b], eq) + } + } +} diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go index 6285486c66..67e4081f74 100644 --- a/src/cmd/internal/obj/sym.go +++ b/src/cmd/internal/obj/sym.go @@ -202,8 +202,10 @@ func (ctxt *Link) NumberSyms() { var idx, hashedidx, hashed64idx, nonpkgidx int32 ctxt.traverseSyms(traverseDefs, func(s *LSym) { - if s.ContentAddressable() && len(s.R) == 0 { // TODO: currently we don't support content-addressable symbols with relocations - if len(s.P) <= 8 { + // if Pkgpath is unknown, cannot hash symbols with relocations, as it + // may reference named symbols whose names are not fully expanded. + if s.ContentAddressable() && (ctxt.Pkgpath != "" || len(s.R) == 0) { + if len(s.P) <= 8 && len(s.R) == 0 { // we can use short hash only for symbols without relocations s.PkgIdx = goobj2.PkgIdxHashed64 s.SymIdx = hashed64idx if hashed64idx != int32(len(ctxt.hashed64defs)) {