From 019d79872681d49de7ea192ac2dbc02f0ce679fc Mon Sep 17 00:00:00 2001 From: Mark Freeman Date: Fri, 9 May 2025 16:49:15 -0400 Subject: [PATCH] internal/pkgbits: write a formal grammar for UIR primitives This complements the grammar being developed in package noder. It is unclear how to discuss references in their current state, as they require knowledge of sections, elements, etc. Perhaps the references here should refer to indices on the byte array. This would allow a stronger separation of pkgbits and noder. Change-Id: Ic0e5ac9c07f0a0b92d6ffd4d4e26dbe5dcf89e57 Reviewed-on: https://go-review.googlesource.com/c/go/+/671440 Reviewed-by: Robert Griesemer TryBot-Bypass: Mark Freeman Auto-Submit: Mark Freeman --- src/internal/pkgbits/doc.go | 79 +++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/src/internal/pkgbits/doc.go b/src/internal/pkgbits/doc.go index 223f6983e5..b2c4888685 100644 --- a/src/internal/pkgbits/doc.go +++ b/src/internal/pkgbits/doc.go @@ -2,6 +2,85 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +/* +The Unified IR (UIR) format for primitive types is implicitly defined by the +package pkgbits. + +The most basic primitives are laid out as below. + +Bool = [ Sync ] byte . +Int64 = [ Sync ] zvarint . +Uint64 = [ Sync ] uvarint . + +zvarint = (* a zig-zag encoded signed variable-width integer *) . +uvarint = (* an unsigned variable-width integer *) . + +# Strings +Strings are not encoded directly. Rather, they are deduplicated during encoding +and referenced where needed. + +String = [ Sync ] StringRef . +StringRef = [ Sync ] Uint64 . // TODO(markfreeman): Document. + +StringSlice = Uint64 // the number of strings in the slice + { String } + . + +// TODO(markfreeman) It is awkward to discuss references (and by extension +// strings and constants). We cannot explain how they resolve without mention +// of foreign concepts. Ideally, references would be defined in familar terms — +// perhaps using an index on the byte array. + +# Constants +Constants appear as defined via the package constant. + +Constant = [ Sync ] + Bool // whether the constant is a complex number + Scalar // the real part + [ Scalar ] // if complex, the imaginary part + . + +A scalar represents a value using one of several potential formats. The exact +format and interpretation is distinguished by a code preceding the value. + +Scalar = [ Sync ] + Uint64 // the code + Val + . + +Val = Bool + | Int64 + | String + | Term // big integer + | Term Term // big ratio, numerator / denominator + | BigBytes // big float, precision 512 + . + +Term = BigBytes + Bool // whether the term is negative + . + +BigBytes = String . // bytes of a big value + +# Markers +Markers provide a mechanism for asserting that encoders and decoders are +synchronized. If an unexpected marker is found, decoding panics. + +Sync = uvarint // indicates what should follow if synchronized + WriterPCs + . + +A marker also records a configurable number of program counters (PCs) during +encoding to assist with debugging. + +WriterPCs = uvarint // the number of PCs that follow + { uvarint } // the PCs + . + +Note that markers are always defined using terminals — they never contain a +marker themselves. +*/ + // Package pkgbits implements low-level coding abstractions for Unified IR's // (UIR) binary export data format. // -- 2.51.0