From 36b3a1cddf4f5ddada4eefb0f2e6dd2f168b79b99b42e74a08d7aa000fef8ad5 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Fri, 18 Oct 2024 13:16:20 +0300 Subject: [PATCH] Deterministic decoding of blob lengths --- cyac/lib/dec.c | 7 ++----- cyac/lib/enc.c | 2 +- cyac/lib/err.c | 2 -- cyac/lib/err.h | 3 --- gyac/dec.go | 8 ++------ gyac/enc.go | 2 +- pyac/pyac.py | 6 +++--- spec/encoding/blob.texi | 16 ++++++++-------- tyac/tyac.tcl | 2 +- 9 files changed, 18 insertions(+), 30 deletions(-) diff --git a/cyac/lib/dec.c b/cyac/lib/dec.c index b8cfd1d..2b3b19c 100644 --- a/cyac/lib/dec.c +++ b/cyac/lib/dec.c @@ -173,13 +173,10 @@ YACAtomDecode( return YACErrNotEnough; } const uint64_t chunkLen = yacFromBE(buf + 1, 8); - if (chunkLen > SIZE_MAX) { + if (chunkLen > (SIZE_MAX - 1)) { return YACErrLenTooBig; } - if (chunkLen == 0) { - return YACErrBlobBadLen; - } - atom->v.blob.chunkLen = (size_t)chunkLen; + atom->v.blob.chunkLen = (size_t)chunkLen + 1; break; } diff --git a/cyac/lib/enc.c b/cyac/lib/enc.c index 22509bf..63b350b 100644 --- a/cyac/lib/enc.c +++ b/cyac/lib/enc.c @@ -159,7 +159,7 @@ YACAtomBlobEncode( return false; } buf[0] = YACAtomBlob; - yacToBE(buf + 1, 8, (uint64_t)chunkLen); + yacToBE(buf + 1, 8, (uint64_t)chunkLen - 1); return true; } diff --git a/cyac/lib/err.c b/cyac/lib/err.c index 5908e3f..6f54748 100644 --- a/cyac/lib/err.c +++ b/cyac/lib/err.c @@ -18,8 +18,6 @@ YACErr2Str(const enum YACErr err) return "BadUTF8"; case YACErrIntNonMinimal: return "IntNonMinimal"; - case YACErrBlobBadLen: - return "BlobBadLen"; case YACErrBlobBadAtom: return "BlobBadAtom"; case YACErrBlobBadTerm: diff --git a/cyac/lib/err.h b/cyac/lib/err.h index 711d81b..64bf0aa 100644 --- a/cyac/lib/err.h +++ b/cyac/lib/err.h @@ -17,8 +17,6 @@ // Invalid UTF-8 codepoint or zero byte met. // @item YACErrIntNonMinimal // Non minimal integer encoding. -// @item YACErrBlobBadLen -// Blob with invalid chunk length. // @item YACErrBlobBadAtom // Blob contains unexpected atom. // @item YACErrBlobBadTerm @@ -49,7 +47,6 @@ enum YACErr { YACErrLenTooBig, YACErrBadUTF8, YACErrIntNonMinimal, - YACErrBlobBadLen, YACErrBlobBadAtom, YACErrBlobBadTerm, YACErrTAI64TooBig, diff --git a/gyac/dec.go b/gyac/dec.go index 3d6c7e2..8609155 100644 --- a/gyac/dec.go +++ b/gyac/dec.go @@ -73,7 +73,6 @@ var ( ErrMapBadKey = errors.New("map bad key") ErrMapUnordered = errors.New("map unordered") ErrMapNoVal = errors.New("map no value") - ErrBlobBadLen = errors.New("blob bad len") ErrBlobBadAtom = errors.New("blob unexpected atom") ErrBlobBadTerm = errors.New("blob bad terminator") ) @@ -220,14 +219,11 @@ func AtomDecode(buf []byte) (item *Item, off int, err error) { return } chunkLen := FromBE(buf[1 : 1+8]) - if chunkLen == 0 { - err = ErrBlobBadLen - return - } - if chunkLen >= (1 << 63) { + if chunkLen >= (1<<63)-1 { err = ErrLenTooBig return } + chunkLen++ item.V = chunkLen case AtomFloat16, AtomFloat32, AtomFloat64, AtomFloat128, AtomFloat256: diff --git a/gyac/enc.go b/gyac/enc.go index 419653e..05ffc13 100644 --- a/gyac/enc.go +++ b/gyac/enc.go @@ -215,7 +215,7 @@ func AtomMapEncode(buf []byte) []byte { func AtomBlobEncode(buf []byte, chunkLen int) []byte { l := make([]byte, 9) l[0] = byte(AtomBlob) - ToBE(l[1:], uint64(chunkLen)) + ToBE(l[1:], uint64(chunkLen-1)) return append(buf, l...) } diff --git a/pyac/pyac.py b/pyac/pyac.py index 0495bf7..3be038c 100644 --- a/pyac/pyac.py +++ b/pyac/pyac.py @@ -293,12 +293,12 @@ class Blob: if isinstance(v, Blob): v = v.v l = v.l - assert (l > 0) and (l <= ((1<<64)-1)) + assert (l > 0) and (l <= (1<<64)) self.v = v self.l = l def encode(self): - raws = [self.tags[0].to_bytes(1, "big"), self.l.to_bytes(8, "big")] + raws = [self.tags[0].to_bytes(1, "big"), (self.l - 1).to_bytes(8, "big")] chunks = len(self.v) // (self.l) for i in range(chunks): raws.append(Nil().encode()) @@ -318,7 +318,7 @@ class Blob: data = data[1:] if len(data) < 8: raise NotEnoughData(8) - l = int.from_bytes(data[:8], "big") + l = 1 + int.from_bytes(data[:8], "big") data = data[8:] vs = [] while True: diff --git a/spec/encoding/blob.texi b/spec/encoding/blob.texi index 76024d5..9f3c572 100644 --- a/spec/encoding/blob.texi +++ b/spec/encoding/blob.texi @@ -6,9 +6,9 @@ Blob (binary large object) allows you to transfer binary data in chunks, in a streaming way, when data may not fit in memory at once. 64-bit big-endian integer follows the BLOB tag, setting the following -chunks payload size. Then come zero or more NIL tags with fixed-length -payload after each of them. Blob is terminated by @ref{Strings, BIN}, -probably having zero length. +chunks payload size (+1). Then come zero or more NIL tags with +fixed-length payload after each of them. Blob is terminated by +@ref{Strings, BIN}, probably having zero length. Data format definition must specify exact chunk size expected to be used, if it needs deterministic encoding. @@ -19,10 +19,10 @@ BLOB len [NIL || payload0 || NIL || payload1 || ...] BIN @multitable @columnfractions .5 .5 -@item BLOB(5, "") @tab @code{0B 0000000000000005 80} -@item BLOB(5, "12345") @tab @code{0B 0000000000000005 01 3132333435 80} -@item BLOB(5, "123456") @tab @code{0B 0000000000000005 01 3132333435 81 36} -@item BLOB(500, "123") @tab @code{0B 00000000000001F4 83 313233} -@item BLOB(2, "12345") @tab @code{0B 0000000000000002 01 3132 01 3334 81 35} +@item BLOB(5, "") @tab @code{0B 0000000000000004 80} +@item BLOB(5, "12345") @tab @code{0B 0000000000000004 01 3132333435 80} +@item BLOB(5, "123456") @tab @code{0B 0000000000000004 01 3132333435 81 36} +@item BLOB(500, "123") @tab @code{0B 00000000000001F3 83 313233} +@item BLOB(2, "12345") @tab @code{0B 0000000000000001 01 3132 01 3334 81 35} @end multitable diff --git a/tyac/tyac.tcl b/tyac/tyac.tcl index 837351c..f4232fd 100644 --- a/tyac/tyac.tcl +++ b/tyac/tyac.tcl @@ -136,7 +136,7 @@ proc MAP {pairs} { proc BLOB {chunkLen v} { char [expr 0x0B] - toBE 8 $chunkLen + toBE 8 [expr {$chunkLen - 1}] set vl [string length $v] set chunks [expr {$vl / $chunkLen}] for {set i 0} {$i < $chunks} {incr i} { -- 2.48.1