From 6eee80eb1a00e7c218539df8c3f3a2c9d790b3b23e6fa56b5756c97cebb1be3b Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Wed, 9 Oct 2024 14:04:50 +0300 Subject: [PATCH] Simpler blob --- cyac/cmd/print.c | 10 +--------- cyac/dec.c | 14 ++++++++++++++ cyac/dec.h | 14 ++++++-------- cyac/enc.c | 11 ++++------- cyac/iter.c | 25 ++----------------------- gyac/dec.go | 36 +++++++++++++++++------------------- gyac/enc.go | 5 ++++- pyac/pyac.py | 8 +++++--- spec/encoding/blob.texi | 20 ++++++++++---------- tyac/tyac.tcl | 2 +- 10 files changed, 64 insertions(+), 81 deletions(-) diff --git a/cyac/cmd/print.c b/cyac/cmd/print.c index 214fc3f..b4771d7 100644 --- a/cyac/cmd/print.c +++ b/cyac/cmd/print.c @@ -139,7 +139,7 @@ myCb( fputs("}\n", stdout); break; case YACItemBlob: - fputs("BLOB(\n", stdout); + printf("BLOB(l=%zu\n", atom->val.uint); state->indent++; err = YACIterBlob(cbState, atom, off, buf, len, myCb); if (err != YACErrNo) { @@ -202,14 +202,6 @@ myCb( fprintf(stdout, "\"%s\"\n", hex); free(hex); break; - case YACItemChunk: - hex = HexEnc(atom->val.buf, atom->len); - fprintf(stdout, "%s\n", hex); - free(hex); - break; - case YACItemChunkLen: - fprintf(stdout, "l=%zu\n", atom->val.uint); - break; case YACItemRaw: hex = HexEnc(atom->val.buf, atom->len); fprintf(stdout, "(t=0x%X l=%zu v=%s)\n", atom->tag, atom->len, hex); diff --git a/cyac/dec.c b/cyac/dec.c index 22cc803..88650d9 100644 --- a/cyac/dec.c +++ b/cyac/dec.c @@ -24,6 +24,9 @@ enum YACErr YACAtomDecode(struct YACAtom *atom, const unsigned char *buf, const size_t len) { + atom->len = 0; + atom->tag = 0; + atom->typ = 0; atom->off = 1; if (len < 1) { return YACErrNotEnough; @@ -150,6 +153,17 @@ YACAtomDecode(struct YACAtom *atom, const unsigned char *buf, const size_t len) break; case YACAtomBlob: atom->typ = YACItemBlob; + atom->off += 8; + if ((ptrdiff_t)len < atom->off) { + return YACErrNotEnough; + } + atom->val.uint = yacFromBE(buf + 1, 8); + if (atom->val.uint == 0) { + return YACErrBlobBadLen; + } + if (atom->val.uint > ((uint64_t)(1) << 60)) { + return YACErrLenTooBig; + } break; case YACAtomFloat16: diff --git a/cyac/dec.h b/cyac/dec.h index 4fa742e..0fa7ba0 100644 --- a/cyac/dec.h +++ b/cyac/dec.h @@ -15,13 +15,11 @@ enum YACItemType { YACItemList, YACItemMap, YACItemBlob, - YACItemFloat, // atom.val.flt - YACItemTAI64, // atom.val.buf, atom.len - YACItemBin, // atom.val.buf, atom.len - YACItemStr, // atom.val.buf, atom.len - YACItemChunk, // atom.val.buf, atom.len, thrown by YACIterBlob - YACItemChunkLen, // atom.val.uint, thrown by YACIterBlob - YACItemRaw, // atom.tag, atom.val.buf, atom.len + YACItemFloat, // atom.val.flt + YACItemTAI64, // atom.val.buf, atom.len + YACItemBin, // atom.val.buf, atom.len + YACItemStr, // atom.val.buf, atom.len + YACItemRaw, // atom.tag, atom.val.buf, atom.len }; enum YACErr { @@ -51,7 +49,7 @@ struct YACAtom { ptrdiff_t off; // length of the whole atom size_t len; // length of the strings, TAI64, raw values union { - uint64_t uint; // unsigned integer's value + uint64_t uint; // unsigned integer's value, blob's chunk len int64_t sint; // negative signed integer's value const unsigned char *buf; // strings, TAI64, UUID value } val; diff --git a/cyac/enc.c b/cyac/enc.c index 84e95a5..765d452 100644 --- a/cyac/enc.c +++ b/cyac/enc.c @@ -140,15 +140,12 @@ YACAtomMapEncode(unsigned char *buf, const size_t cap) ptrdiff_t YACAtomBlobEncode(unsigned char *buf, const size_t cap, const size_t chunkLen) { - if (cap < 1) { - return -1; + if (cap < 1 + 8) { + return -(1 + 8); } buf[0] = YACAtomBlob; - ptrdiff_t res = YACAtomUintEncode(buf + 1, cap - 1, (uint64_t)chunkLen); - if (res <= 0) { - return res; - } - return res + 1; + yacToBE(buf + 1, 8, chunkLen); + return 1 + 8; } static ptrdiff_t diff --git a/cyac/iter.c b/cyac/iter.c index 9405cca..9d81045 100644 --- a/cyac/iter.c +++ b/cyac/iter.c @@ -15,7 +15,6 @@ #include #include -#include #include #include "dec.h" @@ -111,26 +110,8 @@ YACIterBlob( const size_t len, YACIterCb cb) { - enum YACErr err = YACAtomDecode(atom, buf + *off, (size_t)((ptrdiff_t)len - *off)); - if (err != YACErrNo) { - return err; - } - (*off) += atom->off; - if (atom->typ != YACItemUint) { - return YACErrBlobBadLen; - } const size_t chunkLen = atom->val.uint; - if (chunkLen == 0) { - return YACErrBlobBadLen; - } - if (chunkLen > ((uint64_t)(1) << 60)) { - return YACErrLenTooBig; - } - atom->typ = YACItemChunkLen; - err = cb(NULL, 0, -1, cbState, atom, off, buf, len); - if (err != YACErrNo) { - return err; - } + enum YACErr err = YACErrInvalid; bool eoc = false; for (size_t n = 0; !eoc; n++) { err = YACAtomDecode(atom, buf + *off, (size_t)((ptrdiff_t)len - *off)); @@ -143,7 +124,7 @@ YACIterBlob( if (((ptrdiff_t)len - *off) <= (ptrdiff_t)chunkLen) { return YACErrBlobShortChunk; } - atom->typ = YACItemChunk; + atom->typ = YACItemBin; atom->val.buf = buf + *off; atom->len = chunkLen; (*off) += chunkLen; @@ -166,8 +147,6 @@ YACIterBlob( case YACItemFloat: case YACItemTAI64: case YACItemStr: - case YACItemChunk: - case YACItemChunkLen: case YACItemRaw: default: return YACErrBlobBadAtom; diff --git a/gyac/dec.go b/gyac/dec.go index 78ed7c6..ef552b8 100644 --- a/gyac/dec.go +++ b/gyac/dec.go @@ -205,6 +205,21 @@ func AtomDecode(buf []byte) (item *Item, off int, err error) { item.T = byte(ItemMap) case AtomBlob: item.T = byte(ItemBlob) + off += 8 + if len(buf) < off { + err = ErrNotEnough + return + } + chunkLen := FromBE(buf[1 : 1+8]) + if chunkLen == 0 { + err = ErrBlobBadLen + return + } + if chunkLen > (1 << 60) { + err = ErrLenTooBig + return + } + item.V = chunkLen case AtomFloat16, AtomFloat32, AtomFloat64, AtomFloat128, AtomFloat256: var l int @@ -338,26 +353,9 @@ func DecodeItem(buf []byte) (item *Item, tail []byte, err error) { item.V = v return case ItemBlob: - var sub *Item - sub, buf, err = DecodeItem(buf) - tail = buf - if err != nil { - return - } - if sub.T != byte(ItemUInt) { - err = ErrBlobBadLen - return - } - if sub.V.(uint64) > (1 << 60) { - err = ErrLenTooBig - return - } - chunkLen := int(sub.V.(uint64)) - if chunkLen == 0 { - err = ErrBlobBadLen - return - } + chunkLen := int(item.V.(uint64)) v := &Blob{ChunkLen: chunkLen} + var sub *Item BlobCycle: for { sub, buf, err = DecodeItem(buf) diff --git a/gyac/enc.go b/gyac/enc.go index a9350e7..8dca15a 100644 --- a/gyac/enc.go +++ b/gyac/enc.go @@ -213,7 +213,10 @@ func AtomMapEncode(buf []byte) []byte { } func AtomBlobEncode(buf []byte, chunkLen int) []byte { - return AtomUIntEncode(append(buf, byte(AtomBlob)), uint64(chunkLen)) + l := make([]byte, 9) + l[0] = byte(AtomBlob) + ToBE(l[1:], uint64(chunkLen)) + return append(buf, l...) } func atomStrEncode(buf, data []byte, utf8 bool) []byte { diff --git a/pyac/pyac.py b/pyac/pyac.py index 6ac9668..64c7fdd 100644 --- a/pyac/pyac.py +++ b/pyac/pyac.py @@ -297,7 +297,7 @@ class Blob: self.l = l def encode(self): - raws = [self.tags[0].to_bytes(1, "big"), Int(self.l).encode()] + raws = [self.tags[0].to_bytes(1, "big"), self.l.to_bytes(8, "big")] chunks = len(self.v) // (self.l) for i in range(chunks): raws.append(Nil().encode()) @@ -315,8 +315,10 @@ class Blob: if data[0] != klass.tags[0]: raise WrongTag data = data[1:] - l, data = Int.decode(data) - l = l.v + if len(data) < 8: + raise NotEnoughData(8) + l = int.from_bytes(data[:8], "big") + data = data[8:] vs = [] while True: v, data = Decode(data) diff --git a/spec/encoding/blob.texi b/spec/encoding/blob.texi index c0c2d6b..76024d5 100644 --- a/spec/encoding/blob.texi +++ b/spec/encoding/blob.texi @@ -5,24 +5,24 @@ Blob (binary large object) allows you to transfer binary data in chunks, in a streaming way, when data may not fit in memory at once. -Positive non-zero @ref{Integers, INT} must follow the BLOB tag, setting -the following chunks payload size. Then come zero or more NIL tags with -fixed-length payload after each of them. Blob is terminated by -@ref{Strings, BIN}, probably having zero length. +64-bit big-endian integer follows the BLOB tag, setting the following +chunks payload size. Then come zero or more NIL tags with fixed-length +payload after each of them. Blob is terminated by @ref{Strings, BIN}, +probably having zero length. Data format definition must specify exact chunk size expected to be used, if it needs deterministic encoding. @verbatim -BLOB INT [NIL || payload0 || NIL || payload1 || ...] BIN +BLOB len [NIL || payload0 || NIL || payload1 || ...] BIN @end verbatim @multitable @columnfractions .5 .5 -@item BLOB(5, "") @tab @code{0B 45 80} -@item BLOB(5, "12345") @tab @code{0B 45 01 3132333435 80} -@item BLOB(5, "123456") @tab @code{0B 45 01 3132333435 81 36} -@item BLOB(500, "123") @tab @code{0B 2101F4 83 313233} -@item BLOB(2, "12345") @tab @code{0B 42 01 3132 01 3334 81 35} +@item BLOB(5, "") @tab @code{0B 0000000000000005 80} +@item BLOB(5, "12345") @tab @code{0B 0000000000000005 01 3132333435 80} +@item BLOB(5, "123456") @tab @code{0B 0000000000000005 01 3132333435 81 36} +@item BLOB(500, "123") @tab @code{0B 00000000000001F4 83 313233} +@item BLOB(2, "12345") @tab @code{0B 0000000000000002 01 3132 01 3334 81 35} @end multitable diff --git a/tyac/tyac.tcl b/tyac/tyac.tcl index 7ed9d8b..1a12867 100644 --- a/tyac/tyac.tcl +++ b/tyac/tyac.tcl @@ -133,7 +133,7 @@ proc MAP {pairs} { proc BLOB {chunkLen v} { char [expr 0x0B] - INT $chunkLen + toBE 8 $chunkLen set vl [string length $v] set chunks [expr {$vl / $chunkLen}] for {set i 0} {$i < $chunks} {incr i} { -- 2.48.1