From 815a5e29f434281c9ae3740ad43aadd4464ae15e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Mart=C3=AD?= Date: Sat, 26 Sep 2020 21:00:37 +0100 Subject: [PATCH] cmd/go: fix doc math for build cache hashing MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The function takes five 24-bit chunks from the hash, resulting in 120 bits. When base-64 encoded, this results in a 20-byte output string, which is confirmed by "var dst [chunks * 4]byte". It seems like the documented math could have been written for a previous implementation with shorter hashes, using 4 chunks instead of 5, as then the math checks out. Since this code has been working correctly for over three years, let's fix the documentation to reflect the code. Change-Id: I9e908e6bafb5dcc1e3c23915e2b6c8843ed444d6 Reviewed-on: https://go-review.googlesource.com/c/go/+/257646 Trust: Daniel Martí Run-TryBot: Daniel Martí TryBot-Result: Go Bot Reviewed-by: Bryan C. Mills --- src/cmd/go/internal/work/buildid.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cmd/go/internal/work/buildid.go b/src/cmd/go/internal/work/buildid.go index 6613b6fe3f..a3c9b1a2c1 100644 --- a/src/cmd/go/internal/work/buildid.go +++ b/src/cmd/go/internal/work/buildid.go @@ -110,15 +110,15 @@ func contentID(buildID string) string { // hashToString converts the hash h to a string to be recorded // in package archives and binaries as part of the build ID. -// We use the first 96 bits of the hash and encode it in base64, -// resulting in a 16-byte string. Because this is only used for +// We use the first 120 bits of the hash (5 chunks of 24 bits each) and encode +// it in base64, resulting in a 20-byte string. Because this is only used for // detecting the need to rebuild installed files (not for lookups -// in the object file cache), 96 bits are sufficient to drive the +// in the object file cache), 120 bits are sufficient to drive the // probability of a false "do not need to rebuild" decision to effectively zero. // We embed two different hashes in archives and four in binaries, -// so cutting to 16 bytes is a significant savings when build IDs are displayed. -// (16*4+3 = 67 bytes compared to 64*4+3 = 259 bytes for the -// more straightforward option of printing the entire h in hex). +// so cutting to 20 bytes is a significant savings when build IDs are displayed. +// (20*4+3 = 83 bytes compared to 64*4+3 = 259 bytes for the +// more straightforward option of printing the entire h in base64). func hashToString(h [cache.HashSize]byte) string { const b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" const chunks = 5 @@ -425,7 +425,7 @@ func (b *Builder) useCache(a *Action, actionHash cache.ActionID, target string) // It's important that the overall buildID be unlikely verging on impossible // to appear in the output by chance, but that should be taken care of by // the actionID half; if it also appeared in the input that would be like an - // engineered 96-bit partial SHA256 collision. + // engineered 120-bit partial SHA256 collision. a.actionID = actionHash actionID := hashToString(actionHash) if a.json != nil { -- 2.48.1