// string data
off := dsname(symdata, 0, s, pos, "string")
ggloblsym(symdata, int32(off), obj.DUPOK|obj.RODATA|obj.LOCAL)
+ symdata.Set(obj.AttrContentAddressable, true)
}
return symdata
panic("bad sym ref")
}
return SymID{}
- case goobj2.PkgIdxNone:
+ case goobj2.PkgIdxHashed:
i = s.SymIdx + uint32(rr.NSym())
+ case goobj2.PkgIdxNone:
+ i = s.SymIdx + uint32(rr.NSym()+rr.NHasheddef())
case goobj2.PkgIdxBuiltin:
name, abi := goobj2.BuiltinName(int(s.SymIdx))
return SymID{name, int64(abi)}
// Symbols
pcdataBase := start + rr.PcdataBase()
- n := uint32(rr.NSym() + rr.NNonpkgdef() + rr.NNonpkgref())
- ndef := uint32(rr.NSym() + rr.NNonpkgdef())
+ ndef := uint32(rr.NSym() + rr.NHasheddef() + rr.NNonpkgdef())
+ n := ndef + uint32(rr.NNonpkgref())
for i := uint32(0); i < n; i++ {
osym := rr.Sym(i)
if osym.Name(rr) == "" {
import (
"bytes"
"cmd/internal/bio"
+ "crypto/sha1"
"encoding/binary"
"errors"
"fmt"
// Flag uint8
// Size uint32
// }
+// HashedDefs [...]struct { // hashed (content-addressable) symbol definitions
+// ... // same as SymbolDefs
+// }
// NonPkgDefs [...]struct { // non-pkg symbol definitions
// ... // same as SymbolDefs
// }
// ... // same as SymbolDefs
// }
//
+// Hash [...][N]byte
+//
// RelocIndex [...]uint32 // index to Relocs
// AuxIndex [...]uint32 // index to Aux
// DataIndex [...]uint32 // offset to Data
// SymIdx is the index of the symbol in the given package.
// - If PkgIdx is PkgIdxSelf, SymIdx is the index of the symbol in the
// SymbolDefs array.
+// - If PkgIdx is PkgIdxHashed, SymIdx is the index of the symbol in the
+// HashedDefs array.
// - If PkgIdx is PkgIdxNone, SymIdx is the index of the symbol in the
// NonPkgDefs array (could natually overflow to NonPkgRefs array).
// - Otherwise, SymIdx is the index of the symbol in some other package's
//
// {0, 0} represents a nil symbol. Otherwise PkgIdx should not be 0.
//
+// Hash contains the content hashes of content-addressable symbols, of
+// which PkgIdx is PkgIdxHashed, in the same order of HashedDefs array.
+//
// RelocIndex, AuxIndex, and DataIndex contains indices/offsets to
// Relocs/Aux/Data blocks, one element per symbol, first for all the
-// defined symbols, then all the defined non-package symbols, in the
-// same order of SymbolDefs/NonPkgDefs arrays. For N total defined
-// symbols, the array is of length N+1. The last element is the total
-// number of relocations (aux symbols, data blocks, etc.).
+// defined symbols, then all the defined hashed and non-package symbols,
+// in the same order of SymbolDefs/HashedDefs/NonPkgDefs arrays. For N
+// total defined symbols, the array is of length N+1. The last element is
+// the total number of relocations (aux symbols, data blocks, etc.).
//
// They can be accessed by index. For the i-th symbol, its relocations
// are the RelocIndex[i]-th (inclusive) to RelocIndex[i+1]-th (exclusive)
//
// Each symbol may (or may not) be associated with a number of auxiliary
// symbols. They are described in the Aux block. See Aux struct below.
-// Currently a symbol's Gotype and FuncInfo are auxiliary symbols. We
-// may make use of aux symbols in more cases, e.g. DWARF symbols.
+// Currently a symbol's Gotype, FuncInfo, and associated DWARF symbols
+// are auxiliary symbols.
const stringRefSize = 8 // two uint32s
// Package Index.
const (
PkgIdxNone = (1<<31 - 1) - iota // Non-package symbols
+ PkgIdxHashed // Hashed (content-addressable) symbols // TODO: multiple pseudo-packages depending on hash length/algorithm
PkgIdxBuiltin // Predefined runtime symbols (ex: runtime.newobject)
PkgIdxSelf // Symbols defined in the current package
PkgIdxInvalid = 0
BlkPkgIdx
BlkDwarfFile
BlkSymdef
+ BlkHasheddef
BlkNonpkgdef
BlkNonpkgref
+ BlkHash
BlkRelocIdx
BlkAuxIdx
BlkDataIdx
SymIdx uint32
}
+// Hash
+type HashType [HashSize]byte
+
+const HashSize = sha1.Size
+
// Relocation.
//
// Serialized format:
return int(r.h.Offsets[BlkSymdef+1]-r.h.Offsets[BlkSymdef]) / SymSize
}
+func (r *Reader) NHasheddef() int {
+ return int(r.h.Offsets[BlkHasheddef+1]-r.h.Offsets[BlkHasheddef]) / SymSize
+}
+
func (r *Reader) NNonpkgdef() int {
return int(r.h.Offsets[BlkNonpkgdef+1]-r.h.Offsets[BlkNonpkgdef]) / SymSize
}
return (*Sym)(unsafe.Pointer(&r.b[off]))
}
+// Hash returns a pointer to the i-th hashed symbol's hash.
+// Note: here i is the index of hashed symbols, not all symbols
+// (unlike other accessors).
+func (r *Reader) Hash(i uint32) *HashType {
+ off := r.h.Offsets[BlkHash] + uint32(i*HashSize)
+ return (*HashType)(unsafe.Pointer(&r.b[off]))
+}
+
// NReloc returns the number of relocations of the i-th symbol.
func (r *Reader) NReloc(i uint32) int {
relocIdxOff := r.h.Offsets[BlkRelocIdx] + uint32(i*4)
// Used by the linker to determine what methods can be pruned.
AttrUsedInIface
+ // ContentAddressable indicates this is a content-addressable symbol.
+ AttrContentAddressable
+
// attrABIBase is the value at which the ABI is encoded in
// Attribute. This must be last; all bits after this are
// assumed to be an ABI value.
attrABIBase
)
-func (a Attribute) DuplicateOK() bool { return a&AttrDuplicateOK != 0 }
-func (a Attribute) MakeTypelink() bool { return a&AttrMakeTypelink != 0 }
-func (a Attribute) CFunc() bool { return a&AttrCFunc != 0 }
-func (a Attribute) NoSplit() bool { return a&AttrNoSplit != 0 }
-func (a Attribute) Leaf() bool { return a&AttrLeaf != 0 }
-func (a Attribute) SeenGlobl() bool { return a&AttrSeenGlobl != 0 }
-func (a Attribute) OnList() bool { return a&AttrOnList != 0 }
-func (a Attribute) ReflectMethod() bool { return a&AttrReflectMethod != 0 }
-func (a Attribute) Local() bool { return a&AttrLocal != 0 }
-func (a Attribute) Wrapper() bool { return a&AttrWrapper != 0 }
-func (a Attribute) NeedCtxt() bool { return a&AttrNeedCtxt != 0 }
-func (a Attribute) NoFrame() bool { return a&AttrNoFrame != 0 }
-func (a Attribute) Static() bool { return a&AttrStatic != 0 }
-func (a Attribute) WasInlined() bool { return a&AttrWasInlined != 0 }
-func (a Attribute) TopFrame() bool { return a&AttrTopFrame != 0 }
-func (a Attribute) Indexed() bool { return a&AttrIndexed != 0 }
-func (a Attribute) UsedInIface() bool { return a&AttrUsedInIface != 0 }
+func (a Attribute) DuplicateOK() bool { return a&AttrDuplicateOK != 0 }
+func (a Attribute) MakeTypelink() bool { return a&AttrMakeTypelink != 0 }
+func (a Attribute) CFunc() bool { return a&AttrCFunc != 0 }
+func (a Attribute) NoSplit() bool { return a&AttrNoSplit != 0 }
+func (a Attribute) Leaf() bool { return a&AttrLeaf != 0 }
+func (a Attribute) SeenGlobl() bool { return a&AttrSeenGlobl != 0 }
+func (a Attribute) OnList() bool { return a&AttrOnList != 0 }
+func (a Attribute) ReflectMethod() bool { return a&AttrReflectMethod != 0 }
+func (a Attribute) Local() bool { return a&AttrLocal != 0 }
+func (a Attribute) Wrapper() bool { return a&AttrWrapper != 0 }
+func (a Attribute) NeedCtxt() bool { return a&AttrNeedCtxt != 0 }
+func (a Attribute) NoFrame() bool { return a&AttrNoFrame != 0 }
+func (a Attribute) Static() bool { return a&AttrStatic != 0 }
+func (a Attribute) WasInlined() bool { return a&AttrWasInlined != 0 }
+func (a Attribute) TopFrame() bool { return a&AttrTopFrame != 0 }
+func (a Attribute) Indexed() bool { return a&AttrIndexed != 0 }
+func (a Attribute) UsedInIface() bool { return a&AttrUsedInIface != 0 }
+func (a Attribute) ContentAddressable() bool { return a&AttrContentAddressable != 0 }
func (a *Attribute) Set(flag Attribute, value bool) {
if value {
{bit: AttrWasInlined, s: ""},
{bit: AttrTopFrame, s: "TOPFRAME"},
{bit: AttrIndexed, s: ""},
+ {bit: AttrContentAddressable, s: ""},
}
// TextAttrString formats a for printing in as part of a TEXT prog.
pkgIdx map[string]int32
defs []*LSym // list of defined symbols in the current package
+ hasheddefs []*LSym // list of defined hashed (content-addressable) symbols
nonpkgdefs []*LSym // list of defined non-package symbols
nonpkgrefs []*LSym // list of referenced non-package symbols
"cmd/internal/bio"
"cmd/internal/goobj2"
"cmd/internal/objabi"
+ "crypto/sha1"
"fmt"
"path/filepath"
"strings"
w.Sym(s)
}
+ // Hashed symbol definitions
+ h.Offsets[goobj2.BlkHasheddef] = w.Offset()
+ for _, s := range ctxt.hasheddefs {
+ w.Sym(s)
+ }
+
// Non-pkg symbol definitions
h.Offsets[goobj2.BlkNonpkgdef] = w.Offset()
for _, s := range ctxt.nonpkgdefs {
w.Sym(s)
}
+ // Hashes
+ h.Offsets[goobj2.BlkHash] = w.Offset()
+ for _, s := range ctxt.hasheddefs {
+ w.Hash(s)
+ }
+ // TODO: hashedrefs unused/unsupported for now
+
// Reloc indexes
h.Offsets[goobj2.BlkRelocIdx] = w.Offset()
nreloc := uint32(0)
- lists := [][]*LSym{ctxt.defs, ctxt.nonpkgdefs}
+ lists := [][]*LSym{ctxt.defs, ctxt.hasheddefs, ctxt.nonpkgdefs}
for _, list := range lists {
for _, s := range list {
w.Uint32(nreloc)
o.Write(w.Writer)
}
+func (w *writer) Hash(s *LSym) {
+ if !s.ContentAddressable() {
+ panic("Hash of non-content-addresable symbol")
+ }
+ b := goobj2.HashType(sha1.Sum(s.P))
+ w.Bytes(b[:])
+}
+
func makeSymRef(s *LSym) goobj2.SymRef {
if s == nil {
return goobj2.SymRef{}
seen := make(map[goobj2.SymRef]bool)
w.ctxt.traverseSyms(traverseRefs, func(rs *LSym) { // only traverse refs, not auxs, as tools don't need auxs
switch rs.PkgIdx {
- case goobj2.PkgIdxNone, goobj2.PkgIdxBuiltin, goobj2.PkgIdxSelf: // not an external indexed reference
+ case goobj2.PkgIdxNone, goobj2.PkgIdxHashed, goobj2.PkgIdxBuiltin, goobj2.PkgIdxSelf: // not an external indexed reference
return
case goobj2.PkgIdxInvalid:
panic("unindexed symbol reference")
ctxt.pkgIdx = make(map[string]int32)
ctxt.defs = []*LSym{}
+ ctxt.hasheddefs = []*LSym{}
ctxt.nonpkgdefs = []*LSym{}
- var idx, nonpkgidx int32 = 0, 0
+ var idx, hashedidx, nonpkgidx int32
ctxt.traverseSyms(traverseDefs, func(s *LSym) {
- if isNonPkgSym(ctxt, s) {
+ if s.ContentAddressable() {
+ s.PkgIdx = goobj2.PkgIdxHashed
+ s.SymIdx = hashedidx
+ if hashedidx != int32(len(ctxt.hasheddefs)) {
+ panic("bad index")
+ }
+ ctxt.hasheddefs = append(ctxt.hasheddefs, s)
+ hashedidx++
+ } else if isNonPkgSym(ctxt, s) {
s.PkgIdx = goobj2.PkgIdxNone
s.SymIdx = nonpkgidx
if nonpkgidx != int32(len(ctxt.nonpkgdefs)) {
}
}
pkg := rs.Pkg
+ if rs.ContentAddressable() {
+ // for now, only support content-addressable symbols that are always locally defined.
+ panic("hashed refs unsupported for now")
+ }
if pkg == "" || pkg == "\"\"" || pkg == "_" || !rs.Indexed() {
rs.PkgIdx = goobj2.PkgIdxNone
rs.SymIdx = nonpkgidx
bench.Start("hostlink")
ctxt.hostlink()
if ctxt.Debugvlog != 0 {
- ctxt.Logf("%d symbols, %d reachable\n", ctxt.loader.NSym(), ctxt.loader.NReachableSym())
+ ctxt.Logf("%s", ctxt.loader.Stat())
ctxt.Logf("%d liveness data\n", liveness)
}
bench.Start("Flush")
// oReader is a wrapper type of obj.Reader, along with some
// extra information.
-// TODO: rename to objReader once the old one is gone?
type oReader struct {
*goobj2.Reader
- unit *sym.CompilationUnit
- version int // version of static symbol
- flags uint32 // read from object file
- pkgprefix string
- syms []Sym // Sym's global index, indexed by local index
- ndef int // cache goobj2.Reader.NSym()
- objidx uint32 // index of this reader in the objs slice
+ unit *sym.CompilationUnit
+ version int // version of static symbol
+ flags uint32 // read from object file
+ pkgprefix string
+ syms []Sym // Sym's global index, indexed by local index
+ ndef int // cache goobj2.Reader.NSym()
+ nhasheddef int // cache goobj2.Reader.NHashedDef()
+ objidx uint32 // index of this reader in the objs slice
}
+// Total number of defined symbols (package symbols, hashed symbols, and
+// non-package symbols).
+func (r *oReader) NAlldef() int { return r.ndef + r.nhasheddef + r.NNonpkgdef() }
+
type objIdx struct {
r *oReader
i Sym // start index
objSyms []objSym // global index mapping to local index
- symsByName [2]map[string]Sym // map symbol name to index, two maps are for ABI0 and ABIInternal
- extStaticSyms map[nameVer]Sym // externally defined static symbols, keyed by name
+ hashedSyms map[goobj2.HashType]Sym // hashed (content-addressable) symbols, keyed by content hash
+ symsByName [2]map[string]Sym // map symbol name to index, two maps are for ABI0 and ABIInternal
+ extStaticSyms map[nameVer]Sym // externally defined static symbols, keyed by name
extReader *oReader // a dummy oReader, for external symbols
payloadBatch []extSymPayload
elfsetstring elfsetstringFunc
errorReporter *ErrorReporter
+
+ npkgsyms int // number of package symbols, for accounting
}
const (
pkgDef = iota
+ hashedDef
nonPkgDef
nonPkgRef
)
objs: []objIdx{{}, {extReader, 0}}, // reserve index 0 for nil symbol, 1 for external symbols
objSyms: make([]objSym, 1, 100000), // reserve index 0 for nil symbol
extReader: extReader,
- symsByName: [2]map[string]Sym{make(map[string]Sym, 100000), make(map[string]Sym, 50000)}, // preallocate ~2MB for ABI0 and ~1MB for ABI1 symbols
+ hashedSyms: make(map[goobj2.HashType]Sym, 20000), // TODO: adjust preallocation sizes
+ symsByName: [2]map[string]Sym{make(map[string]Sym, 80000), make(map[string]Sym, 50000)}, // preallocate ~2MB for ABI0 and ~1MB for ABI1 symbols
objByPkg: make(map[string]*oReader),
outer: make(map[Sym]Sym),
sub: make(map[Sym]Sym),
addToGlobal := func() {
l.objSyms = append(l.objSyms, objSym{r.objidx, li})
}
- if name == "" {
+ if name == "" && kind != hashedDef {
addToGlobal()
return i, true // unnamed aux symbol
}
addToGlobal()
return i, true
}
- if kind == pkgDef {
+ switch kind {
+ case pkgDef:
// Defined package symbols cannot be dup to each other.
// We load all the package symbols first, so we don't need
// to check dup here.
l.symsByName[ver][name] = i
addToGlobal()
return i, true
+ case hashedDef:
+ // Hashed (content-addressable) symbol. Check the hash
+ // but don't add to name lookup table, as they are not
+ // referenced by name. Also no need to do overwriting
+ // check, as same hash indicates same content.
+ hash := r.Hash(li - uint32(r.ndef))
+ if oldi, existed := l.hashedSyms[*hash]; existed {
+ // TODO: check symbol size for extra safety against collision?
+ if l.flags&FlagStrictDups != 0 {
+ l.checkdup(name, r, li, oldi)
+ }
+ return oldi, false
+ }
+ l.hashedSyms[*hash] = i
+ addToGlobal()
+ return i, true
}
// Non-package (named) symbol. Check if it already exists.
panic("bad sym ref")
}
return 0
- case goobj2.PkgIdxNone:
+ case goobj2.PkgIdxHashed:
i := int(s.SymIdx) + r.ndef
return r.syms[i]
+ case goobj2.PkgIdxNone:
+ i := int(s.SymIdx) + r.ndef + r.nhasheddef
+ return r.syms[i]
case goobj2.PkgIdxBuiltin:
return l.builtinSyms[s.SymIdx]
case goobj2.PkgIdxSelf:
}
pkgprefix := objabi.PathToPrefix(lib.Pkg) + "."
ndef := r.NSym()
- nnonpkgdef := r.NNonpkgdef()
- or := &oReader{r, unit, localSymVersion, r.Flags(), pkgprefix, make([]Sym, ndef+nnonpkgdef+r.NNonpkgref()), ndef, uint32(len(l.objs))}
+ nhasheddef := r.NHasheddef()
+ or := &oReader{
+ Reader: r,
+ unit: unit,
+ version: localSymVersion,
+ flags: r.Flags(),
+ pkgprefix: pkgprefix,
+ syms: make([]Sym, ndef+nhasheddef+r.NNonpkgdef()+r.NNonpkgref()),
+ ndef: ndef,
+ nhasheddef: nhasheddef,
+ objidx: uint32(len(l.objs)),
+ }
// Autolib
lib.Autolib = append(lib.Autolib, r.Autolib()...)
// Preload symbols of given kind from an object.
func (l *Loader) preloadSyms(r *oReader, kind int) {
- ndef := uint32(r.NSym())
- nnonpkgdef := uint32(r.NNonpkgdef())
var start, end uint32
switch kind {
case pkgDef:
start = 0
- end = ndef
+ end = uint32(r.ndef)
+ case hashedDef:
+ start = uint32(r.ndef)
+ end = uint32(r.ndef + r.nhasheddef)
case nonPkgDef:
- start = ndef
- end = ndef + nnonpkgdef
+ start = uint32(r.ndef + r.nhasheddef)
+ end = uint32(r.ndef + r.nhasheddef + r.NNonpkgdef())
default:
panic("preloadSyms: bad kind")
}
loadingRuntimePkg := r.unit.Lib.Pkg == "runtime"
for i := start; i < end; i++ {
osym := r.Sym(i)
- name := osym.Name(r.Reader)
- if needNameExpansion {
- name = strings.Replace(name, "\"\".", r.pkgprefix, -1)
+ var name string
+ var v int
+ var dupok bool
+ var typ sym.SymKind
+ if kind != hashedDef { // we don't need the name, etc. for hashed symbols
+ name = osym.Name(r.Reader)
+ if needNameExpansion {
+ name = strings.Replace(name, "\"\".", r.pkgprefix, -1)
+ }
+ v = abiToVer(osym.ABI(), r.version)
+ if kind == nonPkgDef {
+ dupok = osym.Dupok()
+ typ = sym.AbiSymKindToSymKind[objabi.SymKind(osym.Type())]
+ }
}
- v := abiToVer(osym.ABI(), r.version)
- dupok := osym.Dupok()
- gi, added := l.AddSym(name, v, r, i, kind, dupok, sym.AbiSymKindToSymKind[objabi.SymKind(osym.Type())])
+ gi, added := l.AddSym(name, v, r, i, kind, dupok, typ)
r.syms[i] = gi
if !added {
continue
}
}
-// Add non-package symbols and references to external symbols (which are always
-// named).
+// Add hashed (content-addressable) symbols, non-package symbols, and
+// references to external symbols (which are always named).
func (l *Loader) LoadNonpkgSyms(arch *sys.Arch) {
+ l.npkgsyms = l.NSym()
for _, o := range l.objs[goObjStart:] {
+ l.preloadSyms(o.r, hashedDef)
l.preloadSyms(o.r, nonPkgDef)
}
for _, o := range l.objs[goObjStart:] {
}
func loadObjRefs(l *Loader, r *oReader, arch *sys.Arch) {
- ndef := uint32(r.NSym() + r.NNonpkgdef())
+ ndef := uint32(r.NAlldef())
needNameExpansion := r.NeedNameExpansion()
for i, n := uint32(0), uint32(r.NNonpkgref()); i < n; i++ {
osym := r.Sym(ndef + i)
// If this is a def, then copy the guts. We expect this case
// to be very rare (one case it may come up is with -X).
- if li < uint32(r.NSym()+r.NNonpkgdef()) {
+ if li < uint32(r.NAlldef()) {
// Copy relocations
relocs := l.Relocs(symIdx)
for _, o := range l.objs[goObjStart:] {
r := o.r
lib := r.unit.Lib
- for i, n := uint32(0), uint32(r.NSym()+r.NNonpkgdef()); i < n; i++ {
+ for i, n := uint32(0), uint32(r.NAlldef()); i < n; i++ {
gi := l.toGlobal(r, i)
if !l.attrReachable.Has(gi) {
continue
l.errorReporter.Errorf(s, format, args...)
}
+// Symbol statistics.
+func (l *Loader) Stat() string {
+ s := fmt.Sprintf("%d symbols, %d reachable\n", l.NSym(), l.NReachableSym())
+ s += fmt.Sprintf("\t%d package symbols, %d hashed symbols, %d non-package symbols, %d external symbols\n",
+ l.npkgsyms, len(l.hashedSyms), int(l.extStart)-l.npkgsyms-len(l.hashedSyms), l.NSym()-int(l.extStart))
+ return s
+}
+
// For debugging.
func (l *Loader) Dump() {
fmt.Println("objs")