From: Sergey Matveev Date: Fri, 10 Jan 2025 14:11:17 +0000 (+0300) Subject: Less stateful BLOB decoding X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=89d27be72922dc79c5c385ef537ecce1d8a5c0025eb1ddbab86cf8dac7e2a0f8;p=keks.git Less stateful BLOB decoding Replace NIL-prefixed BLOB's chunks encoding with ordinary BIN-encoding. That way the whole file can be decoded by only calling atom decode functions, without the need of keeping special inside-blob state. --- diff --git a/c/cmd/print-itered/print-itered.c b/c/cmd/print-itered/print-itered.c index 80520ef..3954f56 100644 --- a/c/cmd/print-itered/print-itered.c +++ b/c/cmd/print-itered/print-itered.c @@ -13,15 +13,12 @@ // You should have received a copy of the GNU Lesser General Public // License along with this program. If not, see . -#include -#include #include #include #include #include #include -#include #include "../lib/hex.h" #include "../lib/mmap.h" @@ -30,132 +27,6 @@ static const size_t maxStrLen = 40; -struct CbState { - int indent; -}; - -static enum KEKSErr -myCb( - const unsigned char *key, - const size_t keyLen, - const bool inList, - const size_t idx, - void *cbState, - struct KEKSAtom *atom, - size_t *off, - const unsigned char *buf, - const size_t len) -{ - struct CbState *state = (struct CbState *)(cbState); - if ((atom->typ) == KEKSItemEOC) { - state->indent--; - assert(state->indent >= 0); - } - printf("%04zu ", *off); - for (int i = 0; i < state->indent; i++) { - fputs(" ", stdout); - } - if (key != NULL) { - fwrite(key, keyLen, 1, stdout); - fputs(": ", stdout); - } else if (inList) { - fprintf(stdout, "%zu: ", idx); - } - - char *hex = NULL; - enum KEKSErr err = KEKSErrInvalid; - switch (atom->typ) { - case KEKSItemInvalid: - fputs("INVALID\n", stdout); - break; - case KEKSItemEOC: - break; - case KEKSItemNIL: - fputs("NIL\n", stdout); - break; - case KEKSItemFalse: - fputs("FALSE\n", stdout); - break; - case KEKSItemTrue: - fputs("TRUE\n", stdout); - break; - case KEKSItemUUID: - UUIDPrint(atom->v.uuid); - fputs("\n", stdout); - break; - case KEKSItemPint: - fprintf(stdout, "%zu\n", atom->v.pint); - break; - case KEKSItemNint: - fprintf(stdout, "%zd\n", atom->v.nint); - break; - case KEKSItemList: - fputs("[\n", stdout); - state->indent++; - err = KEKSIterList(cbState, atom, off, buf, len, myCb); - if (err != KEKSErrNo) { - return err; - } - fputs("]\n", stdout); - break; - case KEKSItemMap: - fputs("{\n", stdout); - state->indent++; - err = KEKSIterMap(cbState, atom, off, buf, len, myCb); - if (err != KEKSErrNo) { - return err; - } - fputs("}\n", stdout); - break; - case KEKSItemBlob: - printf("BLOB(l=%zu\n", atom->v.blob.chunkLen); - state->indent++; - err = KEKSIterBlob(cbState, atom, off, buf, len, myCb); - if (err != KEKSErrNo) { - return err; - } - fputs(")\n", stdout); - break; - case KEKSItemFloat: - fputs("FLOAT: TODO\n", stdout); - break; - case KEKSItemTAI64: - err = PrintTAI64(atom->v.str.ptr, atom->v.str.len); - if (err != KEKSErrNo) { - return err; - } - break; - case KEKSItemBin: { - const size_t l = (atom->v.str.len > maxStrLen) ? maxStrLen : atom->v.str.len; - hex = HexEnc(atom->v.str.ptr, l); - fprintf( - stdout, - "%zu:%s%s\n", - atom->v.str.len, - hex, - (atom->v.str.len > maxStrLen) ? "..." : ""); - free(hex); - break; - } - case KEKSItemStr: { - const size_t l = (atom->v.str.len > maxStrLen) ? maxStrLen : atom->v.str.len; - hex = strndup((const char *)(atom->v.str.ptr), l); - fprintf(stdout, "\"%s%s\"\n", hex, (atom->v.str.len > maxStrLen) ? "..." : ""); - free(hex); - break; - } - case KEKSItemRaw: - hex = HexEnc(atom->v.str.ptr, atom->v.str.len); - fprintf(stdout, "(l=%zu v=%s)\n", atom->v.str.len, hex); - free(hex); - break; - default: - fprintf(stderr, "unknown atom\n"); - return EXIT_FAILURE; - } - return KEKSErrNo; -} - int main(int argc, char **argv) { @@ -172,18 +43,99 @@ main(int argc, char **argv) memset(&atom, 0, sizeof(struct KEKSAtom)); size_t off = 0; size_t got = 0; - enum KEKSErr err = KEKSAtomDecode(&got, &atom, buf, len); - if (err != KEKSErrNo) { - fprintf(stderr, "err: %s\n", KEKSErr2Str(err)); - return EXIT_FAILURE; - } - off += got; - struct CbState cbState = {.indent = 0}; - err = myCb(NULL, 0, false, 0, &cbState, &atom, &off, buf, len); - if (err != KEKSErrNo) { - fprintf(stderr, "err: %s\n", KEKSErr2Str(err)); - return EXIT_FAILURE; + int indent = 0; + enum KEKSErr err = KEKSErrInvalid; + while (off < len) { + err = KEKSAtomDecode(&got, &atom, buf + off, len - off); + if (err != KEKSErrNo) { + fprintf(stderr, "err: %s\n", KEKSErr2Str(err)); + return EXIT_FAILURE; + } + off += got; + printf("%d %zu\t", indent, off); + if (atom.typ == KEKSItemEOC) { + indent--; + } + for (int i = 0; i < indent; i++) { + fputs(" ", stdout); + } + char *hex = NULL; + switch (atom.typ) { + case KEKSItemInvalid: + fputs("INVALID\n", stdout); + break; + case KEKSItemEOC: + fputs("EOC\n", stdout); + break; + case KEKSItemNIL: + fputs("NIL\n", stdout); + break; + case KEKSItemFalse: + fputs("FALSE\n", stdout); + break; + case KEKSItemTrue: + fputs("TRUE\n", stdout); + break; + case KEKSItemUUID: + UUIDPrint(atom.v.uuid); + fputs("\n", stdout); + break; + case KEKSItemPint: + fprintf(stdout, "%zu\n", atom.v.pint); + break; + case KEKSItemNint: + fprintf(stdout, "%zd\n", atom.v.nint); + break; + case KEKSItemList: + fputs("LIST\n", stdout); + indent++; + break; + case KEKSItemMap: + fputs("MAP\n", stdout); + indent++; + break; + case KEKSItemBlob: + printf("BLOB(l=%zu\n", atom.v.blob.chunkLen); + break; + case KEKSItemFloat: + fputs("FLOAT: TODO\n", stdout); + break; + case KEKSItemTAI64: + err = PrintTAI64(atom.v.str.ptr, atom.v.str.len); + if (err != KEKSErrNo) { + fprintf(stderr, "err: %s\n", KEKSErr2Str(err)); + return EXIT_FAILURE; + } + break; + case KEKSItemBin: { + const size_t l = (atom.v.str.len > maxStrLen) ? maxStrLen : atom.v.str.len; + hex = HexEnc(atom.v.str.ptr, l); + fprintf( + stdout, + "%zu:%s%s\n", + atom.v.str.len, + hex, + (atom.v.str.len > maxStrLen) ? "..." : ""); + free(hex); + break; + } + case KEKSItemStr: { + const size_t l = (atom.v.str.len > maxStrLen) ? maxStrLen : atom.v.str.len; + hex = strndup((const char *)(atom.v.str.ptr), l); + fprintf( + stdout, "\"%s%s\"\n", hex, (atom.v.str.len > maxStrLen) ? "..." : ""); + free(hex); + break; + } + case KEKSItemRaw: + hex = HexEnc(atom.v.str.ptr, atom.v.str.len); + fprintf(stdout, "(l=%zu v=%s)\n", atom.v.str.len, hex); + free(hex); + break; + default: + fprintf(stderr, "unknown atom\n"); + return EXIT_FAILURE; + } } - assert(cbState.indent == 0); return EXIT_SUCCESS; } diff --git a/c/cmd/test-vector/test-vector.c b/c/cmd/test-vector/test-vector.c index 790378c..b436968 100644 --- a/c/cmd/test-vector/test-vector.c +++ b/c/cmd/test-vector/test-vector.c @@ -75,18 +75,18 @@ main(void) adder(KEKSAtomBlobEncode(&Got, buf + Off, len - Off, 12)); // .blob.1 memset(bin, '6', 12); - adder(KEKSAtomChunkEncode(&Got, buf + Off, len - Off, bin, 12)); + adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, bin, 12)); adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, NULL, 0)); adder(KEKSAtomBlobEncode(&Got, buf + Off, len - Off, 12)); // .blob.2 memset(bin, '7', 12); - adder(KEKSAtomChunkEncode(&Got, buf + Off, len - Off, bin, 12)); + adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, bin, 12)); adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, bin, 1)); adder(KEKSAtomBlobEncode(&Got, buf + Off, len - Off, 5)); // .blob.3 - adder(KEKSAtomChunkEncode( + adder(KEKSAtomBinEncode( &Got, buf + Off, len - Off, (const unsigned char *)"12345", 5)); - adder(KEKSAtomChunkEncode( + adder(KEKSAtomBinEncode( &Got, buf + Off, len - Off, (const unsigned char *)"67890", 5)); adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, (const unsigned char *)"-", 1)); diff --git a/c/doc/atom.texi b/c/doc/atom.texi index c502f68..22718fa 100644 --- a/c/doc/atom.texi +++ b/c/doc/atom.texi @@ -23,6 +23,4 @@ @DOCSTRING KEKSAtomStrEncode@ @anchor{KEKSAtomBinEncode} @DOCSTRING KEKSAtomBinEncode@ -@anchor{KEKSAtomChunkEncode} -@DOCSTRING KEKSAtomChunkEncode@ @DOCSTRING KEKSAtomTAI64Encode@ diff --git a/c/lib/enc.c b/c/lib/enc.c index 4febb50..bf8e418 100644 --- a/c/lib/enc.c +++ b/c/lib/enc.c @@ -225,27 +225,6 @@ KEKSAtomBinEncode( return keksAtomStrEncode(len, buf, cap, src, srcLen, false); } -bool -KEKSAtomChunkEncode( - size_t *len, - unsigned char *buf, - const size_t cap, - const unsigned char *src, - const size_t srcLen) -{ - (*len) = 1 + srcLen; - if ((*len) <= srcLen) { - (*len) = 0; - return false; - } - if (cap < (*len)) { - return false; - } - buf[0] = KEKSAtomNIL; - memcpy(buf + 1, src, srcLen); - return true; -} - bool KEKSAtomTAI64Encode( size_t *len, diff --git a/c/lib/enc.h b/c/lib/enc.h index c0bd9ca..3527c8f 100644 --- a/c/lib/enc.h +++ b/c/lib/enc.h @@ -97,9 +97,8 @@ KEKSAtomMapEncode(size_t *len, unsigned char *buf, const size_t cap); // (size_t *len, unsigned char *buf, const size_t cap, const size_t chunkLen) // Encode BLOB atom in provided @var{buf} with capacity of @var{cap}. // In case of success, true is returned and @var{len} will hold how many -// bytes were written to buffer. You must call @ref{KEKSAtomChunkEncode} -// functions for subsequent chunks, and terminate the blob with -// @ref{KEKSAtomBinEncode}. +// bytes were written to buffer. You must call @ref{KEKSAtomBinEncode} +// functions for subsequent chunks, and terminator. // @end deftypefun bool KEKSAtomBlobEncode( @@ -140,23 +139,6 @@ KEKSAtomBinEncode( const unsigned char *src, const size_t srcLen); -// TEXINFO: KEKSAtomChunkEncode -// @deftypefun bool KEKSAtomChunkEncode @ -// (size_t *len, unsigned char *buf, const size_t cap, @ -// const unsigned char *src, const size_t srcLen) -// Encode the chunk in provided @var{buf} with capacity of @var{cap}. -// In case of success, true is returned and @var{len} will hold how many -// bytes were written to buffer. It is just a convenient wrapper instead -// of using @ref{KEKSAtomNILEncode} followed by @var{srcLen} bytes. -// @end deftypefun -bool -KEKSAtomChunkEncode( - size_t *len, - unsigned char *buf, - const size_t cap, - const unsigned char *src, - const size_t srcLen); - // TEXINFO: KEKSAtomTAI64Encode // @deftypefun bool KEKSAtomTAI64Encode @ // (size_t *len, unsigned char *buf, const size_t cap, @ diff --git a/c/lib/err.c b/c/lib/err.c index 5455700..51fe649 100644 --- a/c/lib/err.c +++ b/c/lib/err.c @@ -22,8 +22,8 @@ KEKSErr2Str(const enum KEKSErr err) return "IntNonMinimal"; case KEKSErrBlobBadAtom: return "BlobBadAtom"; - case KEKSErrBlobBadTerm: - return "BlobBadTerm"; + case KEKSErrBlobBadChunkLen: + return "BlobBadChunkLen"; case KEKSErrTAI64TooBig: return "TAI64TooBig"; case KEKSErrTAI64BadNsec: diff --git a/c/lib/err.h b/c/lib/err.h index 3bc111c..c6d59e6 100644 --- a/c/lib/err.h +++ b/c/lib/err.h @@ -21,8 +21,8 @@ // Non minimal integer encoding. // @item KEKSErrBlobBadAtom // Blob contains unexpected atom. -// @item KEKSErrBlobBadTerm -// Blob contains invalid terminator. +// @item KEKSErrBlobBadChunkLen +// Blob contains chunk with invalid length. // @item KEKSErrTAI64TooBig // Too large TAI64 value, out-of-bounds. // @item KEKSErrTAI64BadNsec @@ -53,7 +53,7 @@ enum KEKSErr { KEKSErrIntNonBin, KEKSErrIntNonMinimal, KEKSErrBlobBadAtom, - KEKSErrBlobBadTerm, + KEKSErrBlobBadChunkLen, KEKSErrTAI64TooBig, KEKSErrTAI64BadNsec, KEKSErrTAI64BadAsec, diff --git a/c/lib/items.c b/c/lib/items.c index 1427249..763b944 100644 --- a/c/lib/items.c +++ b/c/lib/items.c @@ -242,32 +242,15 @@ keksItemsParse( // NOLINT(misc-no-recursion) } cur = idx + 1; struct KEKSAtom *atom = &(items->list[cur].atom); -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wswitch-enum" - switch (atom->typ) { -#pragma clang diagnostic pop - case KEKSItemNIL: - atom->v.str.len = chunkLen; - assert(len >= (*off)); - if ((len - (*off)) <= chunkLen) { - return KEKSErrNotEnough; - } - atom->typ = KEKSItemBin; - atom->v.str.ptr = buf + *off; - if ((SIZE_MAX - chunkLen) < (*off)) { - return KEKSErrLenTooBig; - } - (*off) += chunkLen; - break; - case KEKSItemBin: - if (atom->v.str.len >= chunkLen) { - return KEKSErrBlobBadTerm; - } - eoc = true; - break; - default: + if (atom->typ != KEKSItemBin) { return KEKSErrBlobBadAtom; } + if (atom->v.str.len == chunkLen) { + } else if (atom->v.str.len < chunkLen) { + eoc = true; + } else { + return KEKSErrBlobBadChunkLen; + } if (prev != 0) { items->list[prev].next = cur; } @@ -387,7 +370,7 @@ KEKSItemsEncode( // NOLINT(misc-no-recursion) break; } assert(cap >= (*off)); - ok = KEKSAtomChunkEncode( + ok = KEKSAtomBinEncode( &got, buf + *off, cap - (*off), diff --git a/c/lib/iter.c b/c/lib/iter.c deleted file mode 100644 index 258d96e..0000000 --- a/c/lib/iter.c +++ /dev/null @@ -1,183 +0,0 @@ -// ckeks -- C KEKS encoder implementation -// Copyright (C) 2024-2025 Sergey Matveev -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation, version 3 of the License. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this program. If not, see . - -#include -#include -#include -#include -#include - -#include "dec.h" -#include "err.h" -#include "iter.h" - -enum KEKSErr -KEKSIterList( - void *cbState, - struct KEKSAtom *atom, - size_t *off, - const unsigned char *buf, - const size_t len, - KEKSIterCb cb) -{ - size_t got = 0; - enum KEKSErr err = KEKSErrInvalid; - bool eoc = false; - for (size_t n = 0;; n++) { - assert(len >= (*off)); - err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off)); - if (err != KEKSErrNo) { - return err; - } - if ((SIZE_MAX - got) < (*off)) { - return KEKSErrLenTooBig; - } - (*off) += got; - eoc = atom->typ == KEKSItemEOC; - err = cb(NULL, 0, !eoc, n, cbState, atom, off, buf, len); - if (err != KEKSErrNo) { - return err; - } - if (eoc) { - break; - } - } - return KEKSErrNo; -} - -enum KEKSErr -KEKSIterMap( - void *cbState, - struct KEKSAtom *atom, - size_t *off, - const unsigned char *buf, - const size_t len, - KEKSIterCb cb) -{ - enum KEKSErr err = KEKSErrInvalid; - size_t got = 0; - const unsigned char *key = NULL; - size_t keyLen = 0; - for (;;) { - assert(len >= (*off)); - err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off)); - if (err != KEKSErrNo) { - return err; - } - if ((SIZE_MAX - got) < (*off)) { - return KEKSErrLenTooBig; - } - (*off) += got; - if (atom->typ == KEKSItemEOC) { - err = cb(NULL, 0, false, 0, cbState, atom, off, buf, len); - if (err != KEKSErrNo) { - return err; - } - break; - } - if (atom->typ != KEKSItemStr) { - return KEKSErrMapBadKey; - } - if (atom->v.str.len == 0) { - return KEKSErrMapBadKey; - } - if (atom->v.str.len < keyLen) { - return KEKSErrMapUnordered; - } - if ((atom->v.str.len == keyLen) && - (memcmp(key, atom->v.str.ptr, keyLen) >= 0)) { - return KEKSErrMapUnordered; - } - keyLen = atom->v.str.len; - key = atom->v.str.ptr; - assert(len >= (*off)); - err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off)); - if (err != KEKSErrNo) { - return err; - } - if ((SIZE_MAX - got) < (*off)) { - return KEKSErrLenTooBig; - } - (*off) += got; - if (atom->typ == KEKSItemEOC) { - return KEKSErrUnexpectedEOC; - } - err = cb(key, keyLen, false, 0, cbState, atom, off, buf, len); - if (err != KEKSErrNo) { - return err; - } - } - return KEKSErrNo; -} - -enum KEKSErr -KEKSIterBlob( - void *cbState, - struct KEKSAtom *atom, - size_t *off, - const unsigned char *buf, - const size_t len, - KEKSIterCb cb) -{ - const size_t chunkLen = atom->v.blob.chunkLen; - enum KEKSErr err = KEKSErrInvalid; - size_t got = 0; - bool eoc = false; - for (size_t n = 0; !eoc; n++) { - assert(len >= (*off)); - err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off)); - if (err != KEKSErrNo) { - return err; - } - if ((SIZE_MAX - got) < (*off)) { - return KEKSErrLenTooBig; - } - (*off) += got; -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wswitch-enum" - switch (atom->typ) { -#pragma clang diagnostic pop - case KEKSItemNIL: - assert(len >= (*off)); - if ((len - *off) <= chunkLen) { - atom->v.str.len = chunkLen; - return KEKSErrNotEnough; - } - atom->typ = KEKSItemBin; - atom->v.str.ptr = buf + *off; - atom->v.str.len = chunkLen; - if ((SIZE_MAX - chunkLen) < (*off)) { - return KEKSErrLenTooBig; - } - (*off) += chunkLen; - break; - case KEKSItemBin: - if ((atom->v.str.len) >= chunkLen) { - return KEKSErrBlobBadTerm; - } - eoc = true; - break; - default: - return KEKSErrBlobBadAtom; - } - err = cb(NULL, 0, true, n, cbState, atom, off, buf, len); - if (err != KEKSErrNo) { - return err; - } - } - atom->typ = KEKSItemEOC; - err = cb(NULL, 0, false, 0, cbState, atom, off, buf, len); - return err; -} diff --git a/c/lib/iter.h b/c/lib/iter.h deleted file mode 100644 index 9499812..0000000 --- a/c/lib/iter.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef KEKS_ITER_H -#define KEKS_ITER_H - -#include -#include - -#include "dec.h" -#include "err.h" - -typedef enum KEKSErr (*KEKSIterCb)( - const unsigned char *key, - const size_t keyLen, - const bool inList, - const size_t idx, - void *cbState, - struct KEKSAtom *atom, - size_t *off, - const unsigned char *buf, - const size_t len); - -enum KEKSErr -KEKSIterList( - void *cbState, - struct KEKSAtom *atom, - size_t *off, - const unsigned char *buf, - const size_t len, - KEKSIterCb cb); - -enum KEKSErr -KEKSIterMap( - void *cbState, - struct KEKSAtom *atom, - size_t *off, - const unsigned char *buf, - const size_t len, - KEKSIterCb cb); - -enum KEKSErr -KEKSIterBlob( - void *cbState, - struct KEKSAtom *atom, - size_t *off, - const unsigned char *buf, - const size_t len, - KEKSIterCb cb); - -#endif // KEKS_ITER_H diff --git a/c/lib/o.list b/c/lib/o.list index 3aaa683..52a1ee0 100644 --- a/c/lib/o.list +++ b/c/lib/o.list @@ -5,7 +5,6 @@ enctai.o err.o frombe.o items.o -iter.o leapsecs.o tobe.o utf8.o diff --git a/go/atom-encode.go b/go/atom-encode.go index baaa5d1..485d9b4 100644 --- a/go/atom-encode.go +++ b/go/atom-encode.go @@ -114,13 +114,7 @@ func BigIntEncode(w io.Writer, v *big.Int) (written int64, err error) { } // Write an encoded BLOB atom. -// You have to manually provide necessary chunks and -// properly terminate it with BinEncode. -func BlobEncode( - w io.Writer, - chunkLen int64, - r io.Reader, -) (written int64, err error) { +func BlobEncode(w io.Writer, chunkLen int64, r io.Reader) (written int64, err error) { { l := make([]byte, 9) l[0] = byte(AtomBLOB) @@ -142,16 +136,11 @@ func BlobEncode( } return } - n64, err = ByteEncode(w, byte(AtomNIL)) + n64, err = BinEncode(w, chunk) if err != nil { return } written += n64 - n64, err = io.Copy(w, bytes.NewReader(chunk)) - written += n64 - if err != nil { - return - } } n64, err = BinEncode(w, chunk) written += n64 diff --git a/go/blob.go b/go/blob.go index ed27cdf..0d52c15 100644 --- a/go/blob.go +++ b/go/blob.go @@ -51,5 +51,5 @@ type BlobReader struct { } func (blob *BlobReader) String() string { - return fmt.Sprintf("BLOB(%d, ~)", blob.ChunkLen) + return fmt.Sprintf("BLOB(%d, ?)", blob.ChunkLen) } diff --git a/go/parse.go b/go/parse.go index 769fb5a..5a7daee 100644 --- a/go/parse.go +++ b/go/parse.go @@ -22,12 +22,12 @@ import ( ) var ( - ErrMapBadKey = errors.New("map bad key") - ErrMapUnordered = errors.New("map unordered") - ErrBlobBadAtom = errors.New("blob unexpected atom") - ErrBlobBadTerm = errors.New("blob bad terminator") - ErrUnexpectedEOC = errors.New("unexpected EOC") - ErrTooDeep = errors.New("too deep structure") + ErrMapBadKey = errors.New("map bad key") + ErrMapUnordered = errors.New("map unordered") + ErrBlobBadAtom = errors.New("blob unexpected atom") + ErrBlobBadChunkLen = errors.New("blob bad chunk len") + ErrUnexpectedEOC = errors.New("unexpected EOC") + ErrTooDeep = errors.New("too deep structure") ) func (ctx *Decoder) deTail() { @@ -133,34 +133,27 @@ func (ctx *Decoder) parse() (t types.Type, err error) { var chunks []string var sub types.Type var s string - BlobCycle: for { sub, err = ctx.DecodeAtom() if err != nil { return } - switch sub { - case types.NIL: - ctx.deTail() - s, err = ctx.getBytes(int(chunkLen)) - if err != nil { - return - } + if sub != types.Bin { + err = ErrBlobBadAtom + return + } + s = ctx.strs[len(ctx.strs)-1] + ctx.deTail() + ctx.strs = ctx.strs[:len(ctx.strs)-1] + if int64(len(s)) == chunkLen { chunks = append(chunks, s) - case types.Bin: - s = ctx.strs[len(ctx.strs)-1] - if int64(len(s)) >= chunkLen { - err = ErrBlobBadTerm - return - } + } else if int64(len(s)) < chunkLen { if len(s) != 0 { chunks = append(chunks, s) } - ctx.deTail() - ctx.strs = ctx.strs[:len(ctx.strs)-1] - break BlobCycle - default: - err = ErrBlobBadAtom + break + } else { + err = ErrBlobBadChunkLen return } } diff --git a/py3/keks.py b/py3/keks.py index 72e3163..c8dc7eb 100755 --- a/py3/keks.py +++ b/py3/keks.py @@ -235,8 +235,7 @@ def dumps(v): append = raws.append chunks = len(v) // l for i in range(chunks): - append(dumps(None)) - append(v[i*l:(i+1)*l]) + append(dumps(v[i*l:(i+1)*l])) left = len(v) - chunks*l assert left < l append(dumps(b"") if (left == 0) else dumps(v[-left:])) @@ -427,18 +426,15 @@ def _loads(v, sets=False, leapsecUTCAllow=False, _allowContainers=True): raws = [] while True: i, v = _loads(v, _allowContainers=False) - if i is None: - if len(v) < l: - raise NotEnoughData(l-len(v)+1) - raws.append(v[:l]) - v = v[l:] - elif isinstance(i, bytes): - if len(i) >= l: - raise DecodeError("wrong terminator len") + if not isinstance(i, bytes): + raise DecodeError("unexpected tag") + if len(i) == l: + raws.append(i) + elif len(i) < l: raws.append(i) break else: - raise DecodeError("unexpected tag") + raise DecodeError("wrong chunk len") return Blob(l, b"".join(raws)), v raise DecodeError("unknown tag") diff --git a/py3/tests/test_blob.py b/py3/tests/test_blob.py index 2c7ae19..c39ae6a 100644 --- a/py3/tests/test_blob.py +++ b/py3/tests/test_blob.py @@ -37,8 +37,8 @@ class TestBlob(TestCase): encoded, b"".join(( bytes.fromhex("0B0000000000000003"), - bytes.fromhex("01"), b"test", - bytes.fromhex("01"), b"data", + bytes.fromhex("84"), b"test", + bytes.fromhex("84"), b"data", bytes.fromhex("80"), )), ) @@ -55,8 +55,8 @@ class TestBlob(TestCase): encoded, b"".join(( bytes.fromhex("0B0000000000000003"), - bytes.fromhex("01"), b"test", - bytes.fromhex("01"), b"data", + bytes.fromhex("84"), b"test", + bytes.fromhex("84"), b"data", bytes.fromhex("81"), b"2", )), ) @@ -91,7 +91,7 @@ class TestBlob(TestCase): encoded = b"".join(( b"\x0b", (chunkLen-1).to_bytes(8, "big"), - b"".join((b"\x01" + chunk) for chunk in chunks), + b"".join(dumps(chunk) for chunk in chunks), b"\x80", junk, )) @@ -103,12 +103,12 @@ class TestBlob(TestCase): def test_throws_when_not_enough_data(self) -> None: encoded = b"".join(( bytes.fromhex("0B0000000000000003"), - bytes.fromhex("01"), b"test", - bytes.fromhex("01"), b"da", + bytes.fromhex("84"), b"test", + bytes.fromhex("84"), b"da", )) with self.assertRaises(NotEnoughData) as err: loads(encoded) - self.assertEqual(err.exception.n, 3) + self.assertEqual(err.exception.n, 2) def test_throws_when_not_enough_data_for_length(self) -> None: encoded = bytes.fromhex("0B00000000") @@ -119,19 +119,19 @@ class TestBlob(TestCase): def test_throws_when_wrong_terminator_length(self) -> None: encoded = b"".join(( bytes.fromhex("0B0000000000000003"), - bytes.fromhex("01"), b"test", - bytes.fromhex("01"), b"data", + bytes.fromhex("84"), b"test", + bytes.fromhex("84"), b"data", bytes.fromhex("8A"), b"terminator", )) with self.assertRaises(DecodeError) as err: loads(encoded) - self.assertEqual(str(err.exception), "wrong terminator len") + self.assertEqual(str(err.exception), "wrong chunk len") def test_throws_when_wrong_terminator_tag(self) -> None: encoded = b"".join(( bytes.fromhex("0B0000000000000003"), - bytes.fromhex("01"), b"test", - bytes.fromhex("01"), b"data", + bytes.fromhex("84"), b"test", + bytes.fromhex("84"), b"data", bytes.fromhex("04"), b"that was a wrong tag", )) with self.assertRaises(DecodeError) as err: diff --git a/spec/encoding/blob.texi b/spec/encoding/blob.texi index 72c5ff4..f425931 100644 --- a/spec/encoding/blob.texi +++ b/spec/encoding/blob.texi @@ -8,23 +8,24 @@ Blob (binary large object) allows you to transfer binary data in chunks, in a streaming way, when data may not fit in memory. 64-bit big-endian integer follows the BLOB tag, setting the following -chunks payload size (+1). Then come zero or more NIL tags, each followed -by fixed-length payload. Blob is terminated by @ref{Strings, BIN}, -probably having zero length. +chunks payload size (+1). Then come one or more @ref{BIN} strings with +the chunk-length payload. All of them, except for the last one, must +have fixed chunk length payload. Last terminating string's payload must +be shorter. Data format definition must specify exact chunk size expected to be used, if it needs deterministic encoding. @verbatim -BLOB len [NIL || payload0 || NIL || payload1 || ...] BIN +BLOB chunk-len [BIN(len=chunk-len) || ...] BIN(len