]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.link] cmd/internal/obj: handle content-addressable symbols with relocations
authorCherry Zhang <cherryyz@google.com>
Thu, 16 Jul 2020 03:14:50 +0000 (23:14 -0400)
committerCherry Zhang <cherryyz@google.com>
Mon, 20 Jul 2020 17:26:32 +0000 (17:26 +0000)
For content-addressable symbols with relocations, we build a
content hash based on its content and relocations. Depending on
the category of the referenced symbol, we choose different hash
algorithms such that the hash is globally consistent.

For now, we only support content-addressable symbols with
relocations when the current package's import path is known, so
that the symbol names are fully expanded. Otherwise, if the
referenced symbol is a named symbol whose name is not fully
expanded, the hash won't be globally consistent, and can cause
erroneous collisions. This is fine for now, as the deduplication
is just an optimization, not a requirement for correctness (until
we get to type descriptors).

Change-Id: I639e4e03dd749b5d71f0a55c2525926575b1ac30
Reviewed-on: https://go-review.googlesource.com/c/go/+/243142
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Jeremy Faller <jeremy@golang.org>
src/cmd/asm/main.go
src/cmd/compile/internal/gc/main.go
src/cmd/compile/internal/gc/obj.go
src/cmd/internal/obj/link.go
src/cmd/internal/obj/objfile2.go
src/cmd/internal/obj/objfile_test.go [new file with mode: 0644]
src/cmd/internal/obj/sym.go

index 31d8549d2d49a3d799385ac7c05dd1b101c21e27..a6eb44de734fb9525a93670a278021aebbadcb50 100644 (file)
@@ -41,6 +41,7 @@ func main() {
        ctxt.Flag_dynlink = *flags.Dynlink
        ctxt.Flag_shared = *flags.Shared || *flags.Dynlink
        ctxt.IsAsm = true
+       ctxt.Pkgpath = *flags.Importpath
        switch *flags.Spectre {
        default:
                log.Printf("unknown setting -spectre=%s", *flags.Spectre)
@@ -97,7 +98,7 @@ func main() {
        }
        if ok && !*flags.SymABIs {
                ctxt.NumberSyms()
-               obj.WriteObjFile(ctxt, buf, *flags.Importpath)
+               obj.WriteObjFile(ctxt, buf)
        }
        if !ok || diag {
                if failedFile != "" {
index 6e204f49bc39ad901ad01c687bc5054bb6bfa5e5..bb28ef01a1d30d833dc6b042efd013b747f7c708 100644 (file)
@@ -789,6 +789,7 @@ func Main(archInit func(*Arch)) {
        // Write object data to disk.
        timings.Start("be", "dumpobj")
        dumpdata()
+       Ctxt.Pkgpath = myimportpath
        Ctxt.NumberSyms()
        dumpobj()
        if asmhdr != "" {
index 26ea77536830e2c88079383e00d7ac28811c2f57..0826b04e3390ef990d3110ade56537fb5a7073d9 100644 (file)
@@ -166,7 +166,7 @@ func dumpLinkerObj(bout *bio.Writer) {
 
        fmt.Fprintf(bout, "\n!\n")
 
-       obj.WriteObjFile(Ctxt, bout, myimportpath)
+       obj.WriteObjFile(Ctxt, bout)
 }
 
 func addptabs() {
index ffc3e99a20aab79189859159a8144cc866df9092..195af8494cf044c55f46224ee8f330cbfa0805d3 100644 (file)
@@ -671,6 +671,7 @@ type Link struct {
        Retpoline          bool // emit use of retpoline stubs for indirect jmp/call
        Bso                *bufio.Writer
        Pathname           string
+       Pkgpath            string           // the current package's import path, "" if unknown
        hashmu             sync.Mutex       // protects hash, funchash
        hash               map[string]*LSym // name -> sym mapping
        funchash           map[string]*LSym // name -> sym mapping for ABIInternal syms
index 6ac23bc4185dd3c5797b98fb369dee46f26e104c..6a5f3726f8a50549f360ad3d63a80921f4947fe6 100644 (file)
@@ -12,13 +12,15 @@ import (
        "cmd/internal/goobj2"
        "cmd/internal/objabi"
        "crypto/sha1"
+       "encoding/binary"
        "fmt"
+       "io"
        "path/filepath"
        "strings"
 )
 
 // Entry point of writing new object file.
-func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) {
+func WriteObjFile(ctxt *Link, b *bio.Writer) {
 
        debugAsmEmit(ctxt)
 
@@ -27,7 +29,7 @@ func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) {
        w := writer{
                Writer:  goobj2.NewWriter(b),
                ctxt:    ctxt,
-               pkgpath: objabi.PathToPrefix(pkgpath),
+               pkgpath: objabi.PathToPrefix(ctxt.Pkgpath),
        }
 
        start := b.Offset()
@@ -39,7 +41,7 @@ func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) {
        if ctxt.Flag_shared {
                flags |= goobj2.ObjFlagShared
        }
-       if pkgpath == "" {
+       if w.pkgpath == "" {
                flags |= goobj2.ObjFlagNeedNameExpansion
        }
        if ctxt.IsAsm {
@@ -336,19 +338,80 @@ func (w *writer) Hash64(s *LSym) {
        if !s.ContentAddressable() || len(s.R) != 0 {
                panic("Hash of non-content-addresable symbol")
        }
-       var b goobj2.Hash64Type
-       copy(b[:], s.P)
+       b := contentHash64(s)
        w.Bytes(b[:])
 }
 
 func (w *writer) Hash(s *LSym) {
-       if !s.ContentAddressable() || len(s.R) != 0 { // TODO: currently we don't support content-addressable symbols with relocations
+       if !s.ContentAddressable() {
                panic("Hash of non-content-addresable symbol")
        }
-       b := goobj2.HashType(sha1.Sum(s.P))
+       b := w.contentHash(s)
        w.Bytes(b[:])
 }
 
+func contentHash64(s *LSym) goobj2.Hash64Type {
+       var b goobj2.Hash64Type
+       copy(b[:], s.P)
+       return b
+}
+
+// Compute the content hash for a content-addressable symbol.
+// We build a content hash based on its content and relocations.
+// Depending on the category of the referenced symbol, we choose
+// different hash algorithms such that the hash is globally
+// consistent.
+// - For referenced content-addressable symbol, its content hash
+//   is globally consistent.
+// - For package symbol, its local index is globally consistent.
+// - For non-package symbol, its fully-expanded name is globally
+//   consistent. For now, we require we know the current package
+//   path so we can always expand symbol names. (Otherwise,
+//   symbols with relocations are not considered hashable.)
+//
+// For now, we assume there is no circular dependencies among
+// hashed symbols.
+func (w *writer) contentHash(s *LSym) goobj2.HashType {
+       h := sha1.New()
+       h.Write(s.P)
+       var tmp [14]byte
+       for i := range s.R {
+               r := &s.R[i]
+               binary.LittleEndian.PutUint32(tmp[:4], uint32(r.Off))
+               tmp[4] = r.Siz
+               tmp[5] = uint8(r.Type)
+               binary.LittleEndian.PutUint64(tmp[6:14], uint64(r.Add))
+               h.Write(tmp[:])
+               rs := r.Sym
+               switch rs.PkgIdx {
+               case goobj2.PkgIdxHashed64:
+                       h.Write([]byte{0})
+                       t := contentHash64(rs)
+                       h.Write(t[:])
+               case goobj2.PkgIdxHashed:
+                       h.Write([]byte{1})
+                       t := w.contentHash(rs)
+                       h.Write(t[:])
+               case goobj2.PkgIdxBuiltin:
+                       panic("unsupported")
+               case goobj2.PkgIdxNone:
+                       h.Write([]byte{2})
+                       io.WriteString(h, rs.Name) // name is already expanded at this point
+               case goobj2.PkgIdxSelf:
+                       io.WriteString(h, w.pkgpath)
+                       binary.LittleEndian.PutUint32(tmp[:4], uint32(rs.SymIdx))
+                       h.Write(tmp[:4])
+               default:
+                       io.WriteString(h, rs.Pkg)
+                       binary.LittleEndian.PutUint32(tmp[:4], uint32(rs.SymIdx))
+                       h.Write(tmp[:4])
+               }
+       }
+       var b goobj2.HashType
+       copy(b[:], h.Sum(nil))
+       return b
+}
+
 func makeSymRef(s *LSym) goobj2.SymRef {
        if s == nil {
                return goobj2.SymRef{}
diff --git a/src/cmd/internal/obj/objfile_test.go b/src/cmd/internal/obj/objfile_test.go
new file mode 100644 (file)
index 0000000..ed3be20
--- /dev/null
@@ -0,0 +1,87 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package obj
+
+import (
+       "cmd/internal/goobj2"
+       "cmd/internal/sys"
+       "testing"
+)
+
+var dummyArch = LinkArch{Arch: sys.ArchAMD64}
+
+func TestContentHash64(t *testing.T) {
+       s1 := &LSym{P: []byte("A")}
+       s2 := &LSym{P: []byte("A\x00\x00\x00")}
+       s1.Set(AttrContentAddressable, true)
+       s2.Set(AttrContentAddressable, true)
+       h1 := contentHash64(s1)
+       h2 := contentHash64(s2)
+       if h1 != h2 {
+               t.Errorf("contentHash64(s1)=%x, contentHash64(s2)=%x, expect equal", h1, h2)
+       }
+
+       ctxt := Linknew(&dummyArch) // little endian
+       s3 := ctxt.Int64Sym(int64('A'))
+       h3 := contentHash64(s3)
+       if h1 != h3 {
+               t.Errorf("contentHash64(s1)=%x, contentHash64(s3)=%x, expect equal", h1, h3)
+       }
+}
+
+func TestContentHash(t *testing.T) {
+       syms := []*LSym{
+               &LSym{P: []byte("TestSymbol")},  // 0
+               &LSym{P: []byte("TestSymbol")},  // 1
+               &LSym{P: []byte("TestSymbol2")}, // 2
+               &LSym{P: []byte("")},            // 3
+               &LSym{P: []byte("")},            // 4
+               &LSym{P: []byte("")},            // 5
+               &LSym{P: []byte("")},            // 6
+       }
+       for _, s := range syms {
+               s.Set(AttrContentAddressable, true)
+               s.PkgIdx = goobj2.PkgIdxHashed
+       }
+       // s3 references s0
+       r := Addrel(syms[3])
+       r.Sym = syms[0]
+       // s4 references s0
+       r = Addrel(syms[4])
+       r.Sym = syms[0]
+       // s5 references s1
+       r = Addrel(syms[5])
+       r.Sym = syms[1]
+       // s6 references s2
+       r = Addrel(syms[6])
+       r.Sym = syms[2]
+
+       // compute hashes
+       h := make([]goobj2.HashType, len(syms))
+       w := &writer{}
+       for i := range h {
+               h[i] = w.contentHash(syms[i])
+       }
+
+       tests := []struct {
+               a, b  int
+               equal bool
+       }{
+               {0, 1, true},  // same contents, no relocs
+               {0, 2, false}, // different contents
+               {3, 4, true},  // same contents, same relocs
+               {3, 5, true},  // recursively same contents
+               {3, 6, false}, // same contents, different relocs
+       }
+       for _, test := range tests {
+               if (h[test.a] == h[test.b]) != test.equal {
+                       eq := "equal"
+                       if !test.equal {
+                               eq = "not equal"
+                       }
+                       t.Errorf("h%d=%x, h%d=%x, expect %s", test.a, h[test.a], test.b, h[test.b], eq)
+               }
+       }
+}
index 6285486c669ac9f2b902f28ba4230c4fe96b2753..67e4081f74f568206d981994a4f7dde9e9b9f8aa 100644 (file)
@@ -202,8 +202,10 @@ func (ctxt *Link) NumberSyms() {
 
        var idx, hashedidx, hashed64idx, nonpkgidx int32
        ctxt.traverseSyms(traverseDefs, func(s *LSym) {
-               if s.ContentAddressable() && len(s.R) == 0 { // TODO: currently we don't support content-addressable symbols with relocations
-                       if len(s.P) <= 8 {
+               // if Pkgpath is unknown, cannot hash symbols with relocations, as it
+               // may reference named symbols whose names are not fully expanded.
+               if s.ContentAddressable() && (ctxt.Pkgpath != "" || len(s.R) == 0) {
+                       if len(s.P) <= 8 && len(s.R) == 0 { // we can use short hash only for symbols without relocations
                                s.PkgIdx = goobj2.PkgIdxHashed64
                                s.SymIdx = hashed64idx
                                if hashed64idx != int32(len(ctxt.hashed64defs)) {