From b249703e3c53cd7f1e5f808fb2f03714fec44b43 Mon Sep 17 00:00:00 2001 From: Jeremy Faller Date: Wed, 12 Aug 2020 12:54:03 -0400 Subject: [PATCH] [dev.link] cmd/compile, cmd/asm: add length to hashed symbols While working on deduplicating pcdata, I found that the following hashed symbols would result in the same: [] == [0,0,0,0....] This makes using content addressable symbols untenable for pcdata. Adding the length to the hash keeps the dream alive. No difference in binary size (darwin, cmd/compile), spurious improvements in DWARF phase memory. Change-Id: I21101f7754a3d870922b0dea39c947cc8509432f Reviewed-on: https://go-review.googlesource.com/c/go/+/247903 Run-TryBot: Jeremy Faller TryBot-Result: Gobot Gobot Reviewed-by: Than McIntosh Reviewed-by: Austin Clements Reviewed-by: Cherry Zhang --- src/cmd/internal/obj/objfile.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index 7bc4f4992e..8234697d72 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -372,10 +372,22 @@ func contentHash64(s *LSym) goobj.Hash64Type { // hashed symbols. func (w *writer) contentHash(s *LSym) goobj.HashType { h := sha1.New() + var tmp [14]byte + + // Include the size of the symbol in the hash. + // This preserves the length of symbols, preventing the following two symbols + // from hashing the same: + // + // [2]int{1,2} ≠ [10]int{1,2,0,0,0...} + // + // In this case, if the smaller symbol is alive, the larger is not kept unless + // needed. + binary.LittleEndian.PutUint64(tmp[:8], uint64(s.Size)) + h.Write(tmp[:8]) + // The compiler trims trailing zeros _sometimes_. We just do // it always. h.Write(bytes.TrimRight(s.P, "\x00")) - var tmp [14]byte for i := range s.R { r := &s.R[i] binary.LittleEndian.PutUint32(tmp[:4], uint32(r.Off)) -- 2.48.1