]> Cypherpunks repositories - keks.git/commitdiff
Less stateful BLOB decoding
authorSergey Matveev <stargrave@stargrave.org>
Fri, 10 Jan 2025 14:11:17 +0000 (17:11 +0300)
committerSergey Matveev <stargrave@stargrave.org>
Fri, 10 Jan 2025 14:20:29 +0000 (17:20 +0300)
Replace NIL-prefixed BLOB's chunks encoding with ordinary BIN-encoding.
That way the whole file can be decoded by only calling atom decode
functions, without the need of keeping special inside-blob state.

18 files changed:
c/cmd/print-itered/print-itered.c
c/cmd/test-vector/test-vector.c
c/doc/atom.texi
c/lib/enc.c
c/lib/enc.h
c/lib/err.c
c/lib/err.h
c/lib/items.c
c/lib/iter.c [deleted file]
c/lib/iter.h [deleted file]
c/lib/o.list
go/atom-encode.go
go/blob.go
go/parse.go
py3/keks.py
py3/tests/test_blob.py
spec/encoding/blob.texi
tcl/keks.tcl

index 80520efdd04ee76d529be6ba47a2fcecc0342224c58d806554eca9e91b729cb4..3954f5666255d223b0d19ba5ec4373fe0db1d60cef69087afbf36889477e4089 100644 (file)
 // You should have received a copy of the GNU Lesser General Public
 // License along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-#include <assert.h>
-#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include <keks/dec.h>
 #include <keks/err.h>
-#include <keks/iter.h>
 
 #include "../lib/hex.h"
 #include "../lib/mmap.h"
 
 static const size_t maxStrLen = 40;
 
-struct CbState {
-    int indent;
-};
-
-static enum KEKSErr
-myCb(
-    const unsigned char *key,
-    const size_t keyLen,
-    const bool inList,
-    const size_t idx,
-    void *cbState,
-    struct KEKSAtom *atom,
-    size_t *off,
-    const unsigned char *buf,
-    const size_t len)
-{
-    struct CbState *state = (struct CbState *)(cbState);
-    if ((atom->typ) == KEKSItemEOC) {
-        state->indent--;
-        assert(state->indent >= 0);
-    }
-    printf("%04zu ", *off);
-    for (int i = 0; i < state->indent; i++) {
-        fputs("  ", stdout);
-    }
-    if (key != NULL) {
-        fwrite(key, keyLen, 1, stdout);
-        fputs(": ", stdout);
-    } else if (inList) {
-        fprintf(stdout, "%zu: ", idx);
-    }
-
-    char *hex = NULL;
-    enum KEKSErr err = KEKSErrInvalid;
-    switch (atom->typ) {
-    case KEKSItemInvalid:
-        fputs("INVALID\n", stdout);
-        break;
-    case KEKSItemEOC:
-        break;
-    case KEKSItemNIL:
-        fputs("NIL\n", stdout);
-        break;
-    case KEKSItemFalse:
-        fputs("FALSE\n", stdout);
-        break;
-    case KEKSItemTrue:
-        fputs("TRUE\n", stdout);
-        break;
-    case KEKSItemUUID:
-        UUIDPrint(atom->v.uuid);
-        fputs("\n", stdout);
-        break;
-    case KEKSItemPint:
-        fprintf(stdout, "%zu\n", atom->v.pint);
-        break;
-    case KEKSItemNint:
-        fprintf(stdout, "%zd\n", atom->v.nint);
-        break;
-    case KEKSItemList:
-        fputs("[\n", stdout);
-        state->indent++;
-        err = KEKSIterList(cbState, atom, off, buf, len, myCb);
-        if (err != KEKSErrNo) {
-            return err;
-        }
-        fputs("]\n", stdout);
-        break;
-    case KEKSItemMap:
-        fputs("{\n", stdout);
-        state->indent++;
-        err = KEKSIterMap(cbState, atom, off, buf, len, myCb);
-        if (err != KEKSErrNo) {
-            return err;
-        }
-        fputs("}\n", stdout);
-        break;
-    case KEKSItemBlob:
-        printf("BLOB(l=%zu\n", atom->v.blob.chunkLen);
-        state->indent++;
-        err = KEKSIterBlob(cbState, atom, off, buf, len, myCb);
-        if (err != KEKSErrNo) {
-            return err;
-        }
-        fputs(")\n", stdout);
-        break;
-    case KEKSItemFloat:
-        fputs("FLOAT: TODO\n", stdout);
-        break;
-    case KEKSItemTAI64:
-        err = PrintTAI64(atom->v.str.ptr, atom->v.str.len);
-        if (err != KEKSErrNo) {
-            return err;
-        }
-        break;
-    case KEKSItemBin: {
-        const size_t l = (atom->v.str.len > maxStrLen) ? maxStrLen : atom->v.str.len;
-        hex = HexEnc(atom->v.str.ptr, l);
-        fprintf(
-            stdout,
-            "%zu:%s%s\n",
-            atom->v.str.len,
-            hex,
-            (atom->v.str.len > maxStrLen) ? "..." : "");
-        free(hex);
-        break;
-    }
-    case KEKSItemStr: {
-        const size_t l = (atom->v.str.len > maxStrLen) ? maxStrLen : atom->v.str.len;
-        hex = strndup((const char *)(atom->v.str.ptr), l);
-        fprintf(stdout, "\"%s%s\"\n", hex, (atom->v.str.len > maxStrLen) ? "..." : "");
-        free(hex);
-        break;
-    }
-    case KEKSItemRaw:
-        hex = HexEnc(atom->v.str.ptr, atom->v.str.len);
-        fprintf(stdout, "(l=%zu v=%s)\n", atom->v.str.len, hex);
-        free(hex);
-        break;
-    default:
-        fprintf(stderr, "unknown atom\n");
-        return EXIT_FAILURE;
-    }
-    return KEKSErrNo;
-}
-
 int
 main(int argc, char **argv)
 {
@@ -172,18 +43,99 @@ main(int argc, char **argv)
     memset(&atom, 0, sizeof(struct KEKSAtom));
     size_t off = 0;
     size_t got = 0;
-    enum KEKSErr err = KEKSAtomDecode(&got, &atom, buf, len);
-    if (err != KEKSErrNo) {
-        fprintf(stderr, "err: %s\n", KEKSErr2Str(err));
-        return EXIT_FAILURE;
-    }
-    off += got;
-    struct CbState cbState = {.indent = 0};
-    err = myCb(NULL, 0, false, 0, &cbState, &atom, &off, buf, len);
-    if (err != KEKSErrNo) {
-        fprintf(stderr, "err: %s\n", KEKSErr2Str(err));
-        return EXIT_FAILURE;
+    int indent = 0;
+    enum KEKSErr err = KEKSErrInvalid;
+    while (off < len) {
+        err = KEKSAtomDecode(&got, &atom, buf + off, len - off);
+        if (err != KEKSErrNo) {
+            fprintf(stderr, "err: %s\n", KEKSErr2Str(err));
+            return EXIT_FAILURE;
+        }
+        off += got;
+        printf("%d %zu\t", indent, off);
+        if (atom.typ == KEKSItemEOC) {
+            indent--;
+        }
+        for (int i = 0; i < indent; i++) {
+            fputs("  ", stdout);
+        }
+        char *hex = NULL;
+        switch (atom.typ) {
+        case KEKSItemInvalid:
+            fputs("INVALID\n", stdout);
+            break;
+        case KEKSItemEOC:
+            fputs("EOC\n", stdout);
+            break;
+        case KEKSItemNIL:
+            fputs("NIL\n", stdout);
+            break;
+        case KEKSItemFalse:
+            fputs("FALSE\n", stdout);
+            break;
+        case KEKSItemTrue:
+            fputs("TRUE\n", stdout);
+            break;
+        case KEKSItemUUID:
+            UUIDPrint(atom.v.uuid);
+            fputs("\n", stdout);
+            break;
+        case KEKSItemPint:
+            fprintf(stdout, "%zu\n", atom.v.pint);
+            break;
+        case KEKSItemNint:
+            fprintf(stdout, "%zd\n", atom.v.nint);
+            break;
+        case KEKSItemList:
+            fputs("LIST\n", stdout);
+            indent++;
+            break;
+        case KEKSItemMap:
+            fputs("MAP\n", stdout);
+            indent++;
+            break;
+        case KEKSItemBlob:
+            printf("BLOB(l=%zu\n", atom.v.blob.chunkLen);
+            break;
+        case KEKSItemFloat:
+            fputs("FLOAT: TODO\n", stdout);
+            break;
+        case KEKSItemTAI64:
+            err = PrintTAI64(atom.v.str.ptr, atom.v.str.len);
+            if (err != KEKSErrNo) {
+                fprintf(stderr, "err: %s\n", KEKSErr2Str(err));
+                return EXIT_FAILURE;
+            }
+            break;
+        case KEKSItemBin: {
+            const size_t l = (atom.v.str.len > maxStrLen) ? maxStrLen : atom.v.str.len;
+            hex = HexEnc(atom.v.str.ptr, l);
+            fprintf(
+                stdout,
+                "%zu:%s%s\n",
+                atom.v.str.len,
+                hex,
+                (atom.v.str.len > maxStrLen) ? "..." : "");
+            free(hex);
+            break;
+        }
+        case KEKSItemStr: {
+            const size_t l = (atom.v.str.len > maxStrLen) ? maxStrLen : atom.v.str.len;
+            hex = strndup((const char *)(atom.v.str.ptr), l);
+            fprintf(
+                stdout, "\"%s%s\"\n", hex, (atom.v.str.len > maxStrLen) ? "..." : "");
+            free(hex);
+            break;
+        }
+        case KEKSItemRaw:
+            hex = HexEnc(atom.v.str.ptr, atom.v.str.len);
+            fprintf(stdout, "(l=%zu v=%s)\n", atom.v.str.len, hex);
+            free(hex);
+            break;
+        default:
+            fprintf(stderr, "unknown atom\n");
+            return EXIT_FAILURE;
+        }
     }
-    assert(cbState.indent == 0);
     return EXIT_SUCCESS;
 }
index 790378c20cbade05e7c2a8f1ad885c91a81eaa32ea70d4c6c437771964d7ed5b..b436968dc579d7270c90337192db990fe2b6cca45e08ca59edb6df5f611135ba 100644 (file)
@@ -75,18 +75,18 @@ main(void)
 
     adder(KEKSAtomBlobEncode(&Got, buf + Off, len - Off, 12)); // .blob.1
     memset(bin, '6', 12);
-    adder(KEKSAtomChunkEncode(&Got, buf + Off, len - Off, bin, 12));
+    adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, bin, 12));
     adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, NULL, 0));
 
     adder(KEKSAtomBlobEncode(&Got, buf + Off, len - Off, 12)); // .blob.2
     memset(bin, '7', 12);
-    adder(KEKSAtomChunkEncode(&Got, buf + Off, len - Off, bin, 12));
+    adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, bin, 12));
     adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, bin, 1));
 
     adder(KEKSAtomBlobEncode(&Got, buf + Off, len - Off, 5)); // .blob.3
-    adder(KEKSAtomChunkEncode(
+    adder(KEKSAtomBinEncode(
         &Got, buf + Off, len - Off, (const unsigned char *)"12345", 5));
-    adder(KEKSAtomChunkEncode(
+    adder(KEKSAtomBinEncode(
         &Got, buf + Off, len - Off, (const unsigned char *)"67890", 5));
     adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, (const unsigned char *)"-", 1));
 
index c502f688f2966b92107614466a4321c13c4dd26956ce81520aec30b43ca0dab5..22718faf8439881781ef763662c63996f27a6097d3a70b2cc9b58fb6c2004d9f 100644 (file)
@@ -23,6 +23,4 @@
 @DOCSTRING KEKSAtomStrEncode@
 @anchor{KEKSAtomBinEncode}
 @DOCSTRING KEKSAtomBinEncode@
-@anchor{KEKSAtomChunkEncode}
-@DOCSTRING KEKSAtomChunkEncode@
 @DOCSTRING KEKSAtomTAI64Encode@
index 4febb507a4e8de80bf92c477b47a3340e47dfbe5219274ca5579152ae26f76f8..bf8e41843141113d3c199173b462e1c125e969ca5b037f6e3ab94345c35d2ecd 100644 (file)
@@ -225,27 +225,6 @@ KEKSAtomBinEncode(
     return keksAtomStrEncode(len, buf, cap, src, srcLen, false);
 }
 
-bool
-KEKSAtomChunkEncode(
-    size_t *len,
-    unsigned char *buf,
-    const size_t cap,
-    const unsigned char *src,
-    const size_t srcLen)
-{
-    (*len) = 1 + srcLen;
-    if ((*len) <= srcLen) {
-        (*len) = 0;
-        return false;
-    }
-    if (cap < (*len)) {
-        return false;
-    }
-    buf[0] = KEKSAtomNIL;
-    memcpy(buf + 1, src, srcLen);
-    return true;
-}
-
 bool
 KEKSAtomTAI64Encode(
     size_t *len,
index c0bd9cad7cf1027b5375d5399a32efc83f58be8074426276565ef344405aed96..3527c8f93bfbc02bac1998eca9805e56eaba3416c9f5f1e4eac0ca22d2647f4a 100644 (file)
@@ -97,9 +97,8 @@ KEKSAtomMapEncode(size_t *len, unsigned char *buf, const size_t cap);
 //     (size_t *len, unsigned char *buf, const size_t cap, const size_t chunkLen)
 // Encode BLOB atom in provided @var{buf} with capacity of @var{cap}.
 // In case of success, true is returned and @var{len} will hold how many
-// bytes were written to buffer. You must call @ref{KEKSAtomChunkEncode}
-// functions for subsequent chunks, and terminate the blob with
-// @ref{KEKSAtomBinEncode}.
+// bytes were written to buffer. You must call @ref{KEKSAtomBinEncode}
+// functions for subsequent chunks, and terminator.
 // @end deftypefun
 bool
 KEKSAtomBlobEncode(
@@ -140,23 +139,6 @@ KEKSAtomBinEncode(
     const unsigned char *src,
     const size_t srcLen);
 
-// TEXINFO: KEKSAtomChunkEncode
-// @deftypefun bool KEKSAtomChunkEncode @
-//     (size_t *len, unsigned char *buf, const size_t cap, @
-//     const unsigned char *src, const size_t srcLen)
-// Encode the chunk in provided @var{buf} with capacity of @var{cap}.
-// In case of success, true is returned and @var{len} will hold how many
-// bytes were written to buffer. It is just a convenient wrapper instead
-// of using @ref{KEKSAtomNILEncode} followed by @var{srcLen} bytes.
-// @end deftypefun
-bool
-KEKSAtomChunkEncode(
-    size_t *len,
-    unsigned char *buf,
-    const size_t cap,
-    const unsigned char *src,
-    const size_t srcLen);
-
 // TEXINFO: KEKSAtomTAI64Encode
 // @deftypefun bool KEKSAtomTAI64Encode @
 //     (size_t *len, unsigned char *buf, const size_t cap, @
index 54557002ee994d876100f41ee12b83815770376bb07b4e0e93ff0256a4209b0f..51fe6493db6ab76fcb29f20b43bdfa70b0231f2b4da6e08116f24a61d80dbdf5 100644 (file)
@@ -22,8 +22,8 @@ KEKSErr2Str(const enum KEKSErr err)
         return "IntNonMinimal";
     case KEKSErrBlobBadAtom:
         return "BlobBadAtom";
-    case KEKSErrBlobBadTerm:
-        return "BlobBadTerm";
+    case KEKSErrBlobBadChunkLen:
+        return "BlobBadChunkLen";
     case KEKSErrTAI64TooBig:
         return "TAI64TooBig";
     case KEKSErrTAI64BadNsec:
index 3bc111c73319f7e4c547d2a085c9bd480af10bf0dad8cfd2b0fedd9bbfa87fd1..c6d59e6e2f1b5dc18e4248f8e1f003e7da5484786e6bdd5090d9ee46acc7e78d 100644 (file)
@@ -21,8 +21,8 @@
 //     Non minimal integer encoding.
 // @item KEKSErrBlobBadAtom
 //     Blob contains unexpected atom.
-// @item KEKSErrBlobBadTerm
-//     Blob contains invalid terminator.
+// @item KEKSErrBlobBadChunkLen
+//     Blob contains chunk with invalid length.
 // @item KEKSErrTAI64TooBig
 //     Too large TAI64 value, out-of-bounds.
 // @item KEKSErrTAI64BadNsec
@@ -53,7 +53,7 @@ enum KEKSErr {
     KEKSErrIntNonBin,
     KEKSErrIntNonMinimal,
     KEKSErrBlobBadAtom,
-    KEKSErrBlobBadTerm,
+    KEKSErrBlobBadChunkLen,
     KEKSErrTAI64TooBig,
     KEKSErrTAI64BadNsec,
     KEKSErrTAI64BadAsec,
index 1427249ce5b4c8f3459434f08b3f27d70adac9736fa3510c5374450969f38292..763b94428d5cd0cdd0788e2a630664fac103c5fcc5b4b7ff46d0708884618641 100644 (file)
@@ -242,32 +242,15 @@ keksItemsParse( // NOLINT(misc-no-recursion)
             }
             cur = idx + 1;
             struct KEKSAtom *atom = &(items->list[cur].atom);
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wswitch-enum"
-            switch (atom->typ) {
-#pragma clang diagnostic pop
-            case KEKSItemNIL:
-                atom->v.str.len = chunkLen;
-                assert(len >= (*off));
-                if ((len - (*off)) <= chunkLen) {
-                    return KEKSErrNotEnough;
-                }
-                atom->typ = KEKSItemBin;
-                atom->v.str.ptr = buf + *off;
-                if ((SIZE_MAX - chunkLen) < (*off)) {
-                    return KEKSErrLenTooBig;
-                }
-                (*off) += chunkLen;
-                break;
-            case KEKSItemBin:
-                if (atom->v.str.len >= chunkLen) {
-                    return KEKSErrBlobBadTerm;
-                }
-                eoc = true;
-                break;
-            default:
+            if (atom->typ != KEKSItemBin) {
                 return KEKSErrBlobBadAtom;
             }
+            if (atom->v.str.len == chunkLen) {
+            } else if (atom->v.str.len < chunkLen) {
+                eoc = true;
+            } else {
+                return KEKSErrBlobBadChunkLen;
+            }
             if (prev != 0) {
                 items->list[prev].next = cur;
             }
@@ -387,7 +370,7 @@ KEKSItemsEncode( // NOLINT(misc-no-recursion)
                 break;
             }
             assert(cap >= (*off));
-            ok = KEKSAtomChunkEncode(
+            ok = KEKSAtomBinEncode(
                 &got,
                 buf + *off,
                 cap - (*off),
diff --git a/c/lib/iter.c b/c/lib/iter.c
deleted file mode 100644 (file)
index 258d96e..0000000
+++ /dev/null
@@ -1,183 +0,0 @@
-// ckeks -- C KEKS encoder implementation
-// Copyright (C) 2024-2025 Sergey Matveev <stargrave@stargrave.org>
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Lesser General Public License as
-// published by the Free Software Foundation, version 3 of the License.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "dec.h"
-#include "err.h"
-#include "iter.h"
-
-enum KEKSErr
-KEKSIterList(
-    void *cbState,
-    struct KEKSAtom *atom,
-    size_t *off,
-    const unsigned char *buf,
-    const size_t len,
-    KEKSIterCb cb)
-{
-    size_t got = 0;
-    enum KEKSErr err = KEKSErrInvalid;
-    bool eoc = false;
-    for (size_t n = 0;; n++) {
-        assert(len >= (*off));
-        err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off));
-        if (err != KEKSErrNo) {
-            return err;
-        }
-        if ((SIZE_MAX - got) < (*off)) {
-            return KEKSErrLenTooBig;
-        }
-        (*off) += got;
-        eoc = atom->typ == KEKSItemEOC;
-        err = cb(NULL, 0, !eoc, n, cbState, atom, off, buf, len);
-        if (err != KEKSErrNo) {
-            return err;
-        }
-        if (eoc) {
-            break;
-        }
-    }
-    return KEKSErrNo;
-}
-
-enum KEKSErr
-KEKSIterMap(
-    void *cbState,
-    struct KEKSAtom *atom,
-    size_t *off,
-    const unsigned char *buf,
-    const size_t len,
-    KEKSIterCb cb)
-{
-    enum KEKSErr err = KEKSErrInvalid;
-    size_t got = 0;
-    const unsigned char *key = NULL;
-    size_t keyLen = 0;
-    for (;;) {
-        assert(len >= (*off));
-        err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off));
-        if (err != KEKSErrNo) {
-            return err;
-        }
-        if ((SIZE_MAX - got) < (*off)) {
-            return KEKSErrLenTooBig;
-        }
-        (*off) += got;
-        if (atom->typ == KEKSItemEOC) {
-            err = cb(NULL, 0, false, 0, cbState, atom, off, buf, len);
-            if (err != KEKSErrNo) {
-                return err;
-            }
-            break;
-        }
-        if (atom->typ != KEKSItemStr) {
-            return KEKSErrMapBadKey;
-        }
-        if (atom->v.str.len == 0) {
-            return KEKSErrMapBadKey;
-        }
-        if (atom->v.str.len < keyLen) {
-            return KEKSErrMapUnordered;
-        }
-        if ((atom->v.str.len == keyLen) &&
-            (memcmp(key, atom->v.str.ptr, keyLen) >= 0)) {
-            return KEKSErrMapUnordered;
-        }
-        keyLen = atom->v.str.len;
-        key = atom->v.str.ptr;
-        assert(len >= (*off));
-        err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off));
-        if (err != KEKSErrNo) {
-            return err;
-        }
-        if ((SIZE_MAX - got) < (*off)) {
-            return KEKSErrLenTooBig;
-        }
-        (*off) += got;
-        if (atom->typ == KEKSItemEOC) {
-            return KEKSErrUnexpectedEOC;
-        }
-        err = cb(key, keyLen, false, 0, cbState, atom, off, buf, len);
-        if (err != KEKSErrNo) {
-            return err;
-        }
-    }
-    return KEKSErrNo;
-}
-
-enum KEKSErr
-KEKSIterBlob(
-    void *cbState,
-    struct KEKSAtom *atom,
-    size_t *off,
-    const unsigned char *buf,
-    const size_t len,
-    KEKSIterCb cb)
-{
-    const size_t chunkLen = atom->v.blob.chunkLen;
-    enum KEKSErr err = KEKSErrInvalid;
-    size_t got = 0;
-    bool eoc = false;
-    for (size_t n = 0; !eoc; n++) {
-        assert(len >= (*off));
-        err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off));
-        if (err != KEKSErrNo) {
-            return err;
-        }
-        if ((SIZE_MAX - got) < (*off)) {
-            return KEKSErrLenTooBig;
-        }
-        (*off) += got;
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wswitch-enum"
-        switch (atom->typ) {
-#pragma clang diagnostic pop
-        case KEKSItemNIL:
-            assert(len >= (*off));
-            if ((len - *off) <= chunkLen) {
-                atom->v.str.len = chunkLen;
-                return KEKSErrNotEnough;
-            }
-            atom->typ = KEKSItemBin;
-            atom->v.str.ptr = buf + *off;
-            atom->v.str.len = chunkLen;
-            if ((SIZE_MAX - chunkLen) < (*off)) {
-                return KEKSErrLenTooBig;
-            }
-            (*off) += chunkLen;
-            break;
-        case KEKSItemBin:
-            if ((atom->v.str.len) >= chunkLen) {
-                return KEKSErrBlobBadTerm;
-            }
-            eoc = true;
-            break;
-        default:
-            return KEKSErrBlobBadAtom;
-        }
-        err = cb(NULL, 0, true, n, cbState, atom, off, buf, len);
-        if (err != KEKSErrNo) {
-            return err;
-        }
-    }
-    atom->typ = KEKSItemEOC;
-    err = cb(NULL, 0, false, 0, cbState, atom, off, buf, len);
-    return err;
-}
diff --git a/c/lib/iter.h b/c/lib/iter.h
deleted file mode 100644 (file)
index 9499812..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef KEKS_ITER_H
-#define KEKS_ITER_H
-
-#include <stdbool.h>
-#include <stddef.h>
-
-#include "dec.h"
-#include "err.h"
-
-typedef enum KEKSErr (*KEKSIterCb)(
-    const unsigned char *key,
-    const size_t keyLen,
-    const bool inList,
-    const size_t idx,
-    void *cbState,
-    struct KEKSAtom *atom,
-    size_t *off,
-    const unsigned char *buf,
-    const size_t len);
-
-enum KEKSErr
-KEKSIterList(
-    void *cbState,
-    struct KEKSAtom *atom,
-    size_t *off,
-    const unsigned char *buf,
-    const size_t len,
-    KEKSIterCb cb);
-
-enum KEKSErr
-KEKSIterMap(
-    void *cbState,
-    struct KEKSAtom *atom,
-    size_t *off,
-    const unsigned char *buf,
-    const size_t len,
-    KEKSIterCb cb);
-
-enum KEKSErr
-KEKSIterBlob(
-    void *cbState,
-    struct KEKSAtom *atom,
-    size_t *off,
-    const unsigned char *buf,
-    const size_t len,
-    KEKSIterCb cb);
-
-#endif // KEKS_ITER_H
index 3aaa683b60728dcbb0a97fee91eb83952f6480bb7097049d1bae540b3e979c2f..52a1ee006e15039370f14e46e1783cbe831d7d30e14fac50f301e44751a83557 100644 (file)
@@ -5,7 +5,6 @@ enctai.o
 err.o
 frombe.o
 items.o
-iter.o
 leapsecs.o
 tobe.o
 utf8.o
index baaa5d1e3af6c4e9ba35e05cb79489ba6345b4ea97c70b892cca27acd0686d20..485d9b499d6e139cb187dcd34dfcbf7a02c7b252e6493c94f881096d4167d489 100644 (file)
@@ -114,13 +114,7 @@ func BigIntEncode(w io.Writer, v *big.Int) (written int64, err error) {
 }
 
 // Write an encoded BLOB atom.
-// You have to manually provide necessary chunks and
-// properly terminate it with BinEncode.
-func BlobEncode(
-       w io.Writer,
-       chunkLen int64,
-       r io.Reader,
-) (written int64, err error) {
+func BlobEncode(w io.Writer, chunkLen int64, r io.Reader) (written int64, err error) {
        {
                l := make([]byte, 9)
                l[0] = byte(AtomBLOB)
@@ -142,16 +136,11 @@ func BlobEncode(
                        }
                        return
                }
-               n64, err = ByteEncode(w, byte(AtomNIL))
+               n64, err = BinEncode(w, chunk)
                if err != nil {
                        return
                }
                written += n64
-               n64, err = io.Copy(w, bytes.NewReader(chunk))
-               written += n64
-               if err != nil {
-                       return
-               }
        }
        n64, err = BinEncode(w, chunk)
        written += n64
index ed27cdf5b048fa23c15994f7303512e6bb037a787e9eca92f343e65ae242e951..0d52c15873e1c9b0332fbe0b729f69c271f8a12b3ffcc5ae9a62d020b497c763 100644 (file)
@@ -51,5 +51,5 @@ type BlobReader struct {
 }
 
 func (blob *BlobReader) String() string {
-       return fmt.Sprintf("BLOB(%d, ~)", blob.ChunkLen)
+       return fmt.Sprintf("BLOB(%d, ?)", blob.ChunkLen)
 }
index 769fb5aef10c35776ee359f01232869f06b15fe82048fc7e48f0c188081eeb40..5a7daeeb5b657d357946aab4522803e70bb338b7253aa95b26ff4de4d7096bf4 100644 (file)
@@ -22,12 +22,12 @@ import (
 )
 
 var (
-       ErrMapBadKey     = errors.New("map bad key")
-       ErrMapUnordered  = errors.New("map unordered")
-       ErrBlobBadAtom   = errors.New("blob unexpected atom")
-       ErrBlobBadTerm   = errors.New("blob bad terminator")
-       ErrUnexpectedEOC = errors.New("unexpected EOC")
-       ErrTooDeep       = errors.New("too deep structure")
+       ErrMapBadKey       = errors.New("map bad key")
+       ErrMapUnordered    = errors.New("map unordered")
+       ErrBlobBadAtom     = errors.New("blob unexpected atom")
+       ErrBlobBadChunkLen = errors.New("blob bad chunk len")
+       ErrUnexpectedEOC   = errors.New("unexpected EOC")
+       ErrTooDeep         = errors.New("too deep structure")
 )
 
 func (ctx *Decoder) deTail() {
@@ -133,34 +133,27 @@ func (ctx *Decoder) parse() (t types.Type, err error) {
                var chunks []string
                var sub types.Type
                var s string
-       BlobCycle:
                for {
                        sub, err = ctx.DecodeAtom()
                        if err != nil {
                                return
                        }
-                       switch sub {
-                       case types.NIL:
-                               ctx.deTail()
-                               s, err = ctx.getBytes(int(chunkLen))
-                               if err != nil {
-                                       return
-                               }
+                       if sub != types.Bin {
+                               err = ErrBlobBadAtom
+                               return
+                       }
+                       s = ctx.strs[len(ctx.strs)-1]
+                       ctx.deTail()
+                       ctx.strs = ctx.strs[:len(ctx.strs)-1]
+                       if int64(len(s)) == chunkLen {
                                chunks = append(chunks, s)
-                       case types.Bin:
-                               s = ctx.strs[len(ctx.strs)-1]
-                               if int64(len(s)) >= chunkLen {
-                                       err = ErrBlobBadTerm
-                                       return
-                               }
+                       } else if int64(len(s)) < chunkLen {
                                if len(s) != 0 {
                                        chunks = append(chunks, s)
                                }
-                               ctx.deTail()
-                               ctx.strs = ctx.strs[:len(ctx.strs)-1]
-                               break BlobCycle
-                       default:
-                               err = ErrBlobBadAtom
+                               break
+                       } else {
+                               err = ErrBlobBadChunkLen
                                return
                        }
                }
index 72e3163a6ed290789d49087c1f72f4871b1dfa4b6036354c9a16bc363a100b50..c8dc7eb8ecfaa87cf0f5b1e1d81b218679906791f37b16c9ee19c6ac782f6eba 100755 (executable)
@@ -235,8 +235,7 @@ def dumps(v):
         append = raws.append
         chunks = len(v) // l
         for i in range(chunks):
-            append(dumps(None))
-            append(v[i*l:(i+1)*l])
+            append(dumps(v[i*l:(i+1)*l]))
         left = len(v) - chunks*l
         assert left < l
         append(dumps(b"") if (left == 0) else dumps(v[-left:]))
@@ -427,18 +426,15 @@ def _loads(v, sets=False, leapsecUTCAllow=False, _allowContainers=True):
         raws = []
         while True:
             i, v = _loads(v, _allowContainers=False)
-            if i is None:
-                if len(v) < l:
-                    raise NotEnoughData(l-len(v)+1)
-                raws.append(v[:l])
-                v = v[l:]
-            elif isinstance(i, bytes):
-                if len(i) >= l:
-                    raise DecodeError("wrong terminator len")
+            if not isinstance(i, bytes):
+                raise DecodeError("unexpected tag")
+            if len(i) == l:
+                raws.append(i)
+            elif len(i) < l:
                 raws.append(i)
                 break
             else:
-                raise DecodeError("unexpected tag")
+                raise DecodeError("wrong chunk len")
         return Blob(l, b"".join(raws)), v
     raise DecodeError("unknown tag")
 
index 2c7ae19a57abd37006b1853797a46f01ffc069a599983578232788424f0a590f..c39ae6a5be879d75791e6afbdd15ba356b51b4261d6030f524158e5a102ed791 100644 (file)
@@ -37,8 +37,8 @@ class TestBlob(TestCase):
             encoded,
             b"".join((
                 bytes.fromhex("0B0000000000000003"),
-                bytes.fromhex("01"), b"test",
-                bytes.fromhex("01"), b"data",
+                bytes.fromhex("84"), b"test",
+                bytes.fromhex("84"), b"data",
                 bytes.fromhex("80"),
             )),
         )
@@ -55,8 +55,8 @@ class TestBlob(TestCase):
             encoded,
             b"".join((
                 bytes.fromhex("0B0000000000000003"),
-                bytes.fromhex("01"), b"test",
-                bytes.fromhex("01"), b"data",
+                bytes.fromhex("84"), b"test",
+                bytes.fromhex("84"), b"data",
                 bytes.fromhex("81"), b"2",
             )),
         )
@@ -91,7 +91,7 @@ class TestBlob(TestCase):
         encoded = b"".join((
             b"\x0b",
             (chunkLen-1).to_bytes(8, "big"),
-            b"".join((b"\x01" + chunk) for chunk in chunks),
+            b"".join(dumps(chunk) for chunk in chunks),
             b"\x80",
             junk,
         ))
@@ -103,12 +103,12 @@ class TestBlob(TestCase):
     def test_throws_when_not_enough_data(self) -> None:
         encoded = b"".join((
             bytes.fromhex("0B0000000000000003"),
-            bytes.fromhex("01"), b"test",
-            bytes.fromhex("01"), b"da",
+            bytes.fromhex("84"), b"test",
+            bytes.fromhex("84"), b"da",
         ))
         with self.assertRaises(NotEnoughData) as err:
             loads(encoded)
-        self.assertEqual(err.exception.n, 3)
+        self.assertEqual(err.exception.n, 2)
 
     def test_throws_when_not_enough_data_for_length(self) -> None:
         encoded = bytes.fromhex("0B00000000")
@@ -119,19 +119,19 @@ class TestBlob(TestCase):
     def test_throws_when_wrong_terminator_length(self) -> None:
         encoded = b"".join((
             bytes.fromhex("0B0000000000000003"),
-            bytes.fromhex("01"), b"test",
-            bytes.fromhex("01"), b"data",
+            bytes.fromhex("84"), b"test",
+            bytes.fromhex("84"), b"data",
             bytes.fromhex("8A"), b"terminator",
         ))
         with self.assertRaises(DecodeError) as err:
             loads(encoded)
-        self.assertEqual(str(err.exception), "wrong terminator len")
+        self.assertEqual(str(err.exception), "wrong chunk len")
 
     def test_throws_when_wrong_terminator_tag(self) -> None:
         encoded = b"".join((
             bytes.fromhex("0B0000000000000003"),
-            bytes.fromhex("01"), b"test",
-            bytes.fromhex("01"), b"data",
+            bytes.fromhex("84"), b"test",
+            bytes.fromhex("84"), b"data",
             bytes.fromhex("04"), b"that was a wrong tag",
         ))
         with self.assertRaises(DecodeError) as err:
index 72c5ff47015c8414b80b280eff7ffd59f81cb58ed2ecd0bd19f888264cc336c3..f425931febeeb9a39e2fabd30eef6699c63efca1a7be7eeaa946bd71f40afa1b 100644 (file)
@@ -8,23 +8,24 @@ Blob (binary large object) allows you to transfer binary data in chunks,
 in a streaming way, when data may not fit in memory.
 
 64-bit big-endian integer follows the BLOB tag, setting the following
-chunks payload size (+1). Then come zero or more NIL tags, each followed
-by fixed-length payload. Blob is terminated by @ref{Strings, BIN},
-probably having zero length.
+chunks payload size (+1). Then come one or more @ref{BIN} strings with
+the chunk-length payload. All of them, except for the last one, must
+have fixed chunk length payload. Last terminating string's payload must
+be shorter.
 
 Data format definition must specify exact chunk size expected to be
 used, if it needs deterministic encoding.
 
 @verbatim
-BLOB len [NIL || payload0 || NIL || payload1 || ...] BIN
+BLOB chunk-len [BIN(len=chunk-len) || ...] BIN(len<chunk-len)
 @end verbatim
 
 @multitable @columnfractions .5 .5
 
 @item BLOB(5, "") @tab @code{0B 0000000000000004 80}
-@item BLOB(5, "12345") @tab @code{0B 0000000000000004 01 3132333435 80}
-@item BLOB(5, "123456") @tab @code{0B 0000000000000004 01 3132333435 81 36}
+@item BLOB(5, "12345") @tab @code{0B 0000000000000004 85 3132333435 80}
+@item BLOB(5, "123456") @tab @code{0B 0000000000000004 85 3132333435 81 36}
 @item BLOB(500, "123") @tab @code{0B 00000000000001F3 83 313233}
-@item BLOB(2, "12345") @tab @code{0B 0000000000000001 01 3132 01 3334 81 35}
+@item BLOB(2, "12345") @tab @code{0B 0000000000000001 82 3132 82 3334 81 35}
 
 @end multitable
index 417eb96e5ee10e57d2c30383ea1c4449309c43e8a4e370851bd222e82688f06a..ea9b572f8a2385bfd5e2377548cfdadbb96b066139f2f0cef81352ccbd848642 100644 (file)
@@ -143,8 +143,7 @@ proc BLOB {chunkLen v} {
     set vl [string length $v]
     set chunks [expr {$vl / $chunkLen}]
     for {set i 0} {$i < $chunks} {incr i} {
-        NIL
-        add [string range $v \
+        BIN [string range $v \
             [expr {$i * $chunkLen}] \
             [expr {(($i + 1) * $chunkLen) - 1}]]
     }