Replace NIL-prefixed BLOB's chunks encoding with ordinary BIN-encoding.
That way the whole file can be decoded by only calling atom decode
functions, without the need of keeping special inside-blob state.
// You should have received a copy of the GNU Lesser General Public
// License along with this program. If not, see <http://www.gnu.org/licenses/>.
-#include <assert.h>
-#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <keks/dec.h>
#include <keks/err.h>
-#include <keks/iter.h>
#include "../lib/hex.h"
#include "../lib/mmap.h"
static const size_t maxStrLen = 40;
-struct CbState {
- int indent;
-};
-
-static enum KEKSErr
-myCb(
- const unsigned char *key,
- const size_t keyLen,
- const bool inList,
- const size_t idx,
- void *cbState,
- struct KEKSAtom *atom,
- size_t *off,
- const unsigned char *buf,
- const size_t len)
-{
- struct CbState *state = (struct CbState *)(cbState);
- if ((atom->typ) == KEKSItemEOC) {
- state->indent--;
- assert(state->indent >= 0);
- }
- printf("%04zu ", *off);
- for (int i = 0; i < state->indent; i++) {
- fputs(" ", stdout);
- }
- if (key != NULL) {
- fwrite(key, keyLen, 1, stdout);
- fputs(": ", stdout);
- } else if (inList) {
- fprintf(stdout, "%zu: ", idx);
- }
-
- char *hex = NULL;
- enum KEKSErr err = KEKSErrInvalid;
- switch (atom->typ) {
- case KEKSItemInvalid:
- fputs("INVALID\n", stdout);
- break;
- case KEKSItemEOC:
- break;
- case KEKSItemNIL:
- fputs("NIL\n", stdout);
- break;
- case KEKSItemFalse:
- fputs("FALSE\n", stdout);
- break;
- case KEKSItemTrue:
- fputs("TRUE\n", stdout);
- break;
- case KEKSItemUUID:
- UUIDPrint(atom->v.uuid);
- fputs("\n", stdout);
- break;
- case KEKSItemPint:
- fprintf(stdout, "%zu\n", atom->v.pint);
- break;
- case KEKSItemNint:
- fprintf(stdout, "%zd\n", atom->v.nint);
- break;
- case KEKSItemList:
- fputs("[\n", stdout);
- state->indent++;
- err = KEKSIterList(cbState, atom, off, buf, len, myCb);
- if (err != KEKSErrNo) {
- return err;
- }
- fputs("]\n", stdout);
- break;
- case KEKSItemMap:
- fputs("{\n", stdout);
- state->indent++;
- err = KEKSIterMap(cbState, atom, off, buf, len, myCb);
- if (err != KEKSErrNo) {
- return err;
- }
- fputs("}\n", stdout);
- break;
- case KEKSItemBlob:
- printf("BLOB(l=%zu\n", atom->v.blob.chunkLen);
- state->indent++;
- err = KEKSIterBlob(cbState, atom, off, buf, len, myCb);
- if (err != KEKSErrNo) {
- return err;
- }
- fputs(")\n", stdout);
- break;
- case KEKSItemFloat:
- fputs("FLOAT: TODO\n", stdout);
- break;
- case KEKSItemTAI64:
- err = PrintTAI64(atom->v.str.ptr, atom->v.str.len);
- if (err != KEKSErrNo) {
- return err;
- }
- break;
- case KEKSItemBin: {
- const size_t l = (atom->v.str.len > maxStrLen) ? maxStrLen : atom->v.str.len;
- hex = HexEnc(atom->v.str.ptr, l);
- fprintf(
- stdout,
- "%zu:%s%s\n",
- atom->v.str.len,
- hex,
- (atom->v.str.len > maxStrLen) ? "..." : "");
- free(hex);
- break;
- }
- case KEKSItemStr: {
- const size_t l = (atom->v.str.len > maxStrLen) ? maxStrLen : atom->v.str.len;
- hex = strndup((const char *)(atom->v.str.ptr), l);
- fprintf(stdout, "\"%s%s\"\n", hex, (atom->v.str.len > maxStrLen) ? "..." : "");
- free(hex);
- break;
- }
- case KEKSItemRaw:
- hex = HexEnc(atom->v.str.ptr, atom->v.str.len);
- fprintf(stdout, "(l=%zu v=%s)\n", atom->v.str.len, hex);
- free(hex);
- break;
- default:
- fprintf(stderr, "unknown atom\n");
- return EXIT_FAILURE;
- }
- return KEKSErrNo;
-}
-
int
main(int argc, char **argv)
{
memset(&atom, 0, sizeof(struct KEKSAtom));
size_t off = 0;
size_t got = 0;
- enum KEKSErr err = KEKSAtomDecode(&got, &atom, buf, len);
- if (err != KEKSErrNo) {
- fprintf(stderr, "err: %s\n", KEKSErr2Str(err));
- return EXIT_FAILURE;
- }
- off += got;
- struct CbState cbState = {.indent = 0};
- err = myCb(NULL, 0, false, 0, &cbState, &atom, &off, buf, len);
- if (err != KEKSErrNo) {
- fprintf(stderr, "err: %s\n", KEKSErr2Str(err));
- return EXIT_FAILURE;
+ int indent = 0;
+ enum KEKSErr err = KEKSErrInvalid;
+ while (off < len) {
+ err = KEKSAtomDecode(&got, &atom, buf + off, len - off);
+ if (err != KEKSErrNo) {
+ fprintf(stderr, "err: %s\n", KEKSErr2Str(err));
+ return EXIT_FAILURE;
+ }
+ off += got;
+ printf("%d %zu\t", indent, off);
+ if (atom.typ == KEKSItemEOC) {
+ indent--;
+ }
+ for (int i = 0; i < indent; i++) {
+ fputs(" ", stdout);
+ }
+ char *hex = NULL;
+ switch (atom.typ) {
+ case KEKSItemInvalid:
+ fputs("INVALID\n", stdout);
+ break;
+ case KEKSItemEOC:
+ fputs("EOC\n", stdout);
+ break;
+ case KEKSItemNIL:
+ fputs("NIL\n", stdout);
+ break;
+ case KEKSItemFalse:
+ fputs("FALSE\n", stdout);
+ break;
+ case KEKSItemTrue:
+ fputs("TRUE\n", stdout);
+ break;
+ case KEKSItemUUID:
+ UUIDPrint(atom.v.uuid);
+ fputs("\n", stdout);
+ break;
+ case KEKSItemPint:
+ fprintf(stdout, "%zu\n", atom.v.pint);
+ break;
+ case KEKSItemNint:
+ fprintf(stdout, "%zd\n", atom.v.nint);
+ break;
+ case KEKSItemList:
+ fputs("LIST\n", stdout);
+ indent++;
+ break;
+ case KEKSItemMap:
+ fputs("MAP\n", stdout);
+ indent++;
+ break;
+ case KEKSItemBlob:
+ printf("BLOB(l=%zu\n", atom.v.blob.chunkLen);
+ break;
+ case KEKSItemFloat:
+ fputs("FLOAT: TODO\n", stdout);
+ break;
+ case KEKSItemTAI64:
+ err = PrintTAI64(atom.v.str.ptr, atom.v.str.len);
+ if (err != KEKSErrNo) {
+ fprintf(stderr, "err: %s\n", KEKSErr2Str(err));
+ return EXIT_FAILURE;
+ }
+ break;
+ case KEKSItemBin: {
+ const size_t l = (atom.v.str.len > maxStrLen) ? maxStrLen : atom.v.str.len;
+ hex = HexEnc(atom.v.str.ptr, l);
+ fprintf(
+ stdout,
+ "%zu:%s%s\n",
+ atom.v.str.len,
+ hex,
+ (atom.v.str.len > maxStrLen) ? "..." : "");
+ free(hex);
+ break;
+ }
+ case KEKSItemStr: {
+ const size_t l = (atom.v.str.len > maxStrLen) ? maxStrLen : atom.v.str.len;
+ hex = strndup((const char *)(atom.v.str.ptr), l);
+ fprintf(
+ stdout, "\"%s%s\"\n", hex, (atom.v.str.len > maxStrLen) ? "..." : "");
+ free(hex);
+ break;
+ }
+ case KEKSItemRaw:
+ hex = HexEnc(atom.v.str.ptr, atom.v.str.len);
+ fprintf(stdout, "(l=%zu v=%s)\n", atom.v.str.len, hex);
+ free(hex);
+ break;
+ default:
+ fprintf(stderr, "unknown atom\n");
+ return EXIT_FAILURE;
+ }
}
- assert(cbState.indent == 0);
return EXIT_SUCCESS;
}
adder(KEKSAtomBlobEncode(&Got, buf + Off, len - Off, 12)); // .blob.1
memset(bin, '6', 12);
- adder(KEKSAtomChunkEncode(&Got, buf + Off, len - Off, bin, 12));
+ adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, bin, 12));
adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, NULL, 0));
adder(KEKSAtomBlobEncode(&Got, buf + Off, len - Off, 12)); // .blob.2
memset(bin, '7', 12);
- adder(KEKSAtomChunkEncode(&Got, buf + Off, len - Off, bin, 12));
+ adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, bin, 12));
adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, bin, 1));
adder(KEKSAtomBlobEncode(&Got, buf + Off, len - Off, 5)); // .blob.3
- adder(KEKSAtomChunkEncode(
+ adder(KEKSAtomBinEncode(
&Got, buf + Off, len - Off, (const unsigned char *)"12345", 5));
- adder(KEKSAtomChunkEncode(
+ adder(KEKSAtomBinEncode(
&Got, buf + Off, len - Off, (const unsigned char *)"67890", 5));
adder(KEKSAtomBinEncode(&Got, buf + Off, len - Off, (const unsigned char *)"-", 1));
@DOCSTRING KEKSAtomStrEncode@
@anchor{KEKSAtomBinEncode}
@DOCSTRING KEKSAtomBinEncode@
-@anchor{KEKSAtomChunkEncode}
-@DOCSTRING KEKSAtomChunkEncode@
@DOCSTRING KEKSAtomTAI64Encode@
return keksAtomStrEncode(len, buf, cap, src, srcLen, false);
}
-bool
-KEKSAtomChunkEncode(
- size_t *len,
- unsigned char *buf,
- const size_t cap,
- const unsigned char *src,
- const size_t srcLen)
-{
- (*len) = 1 + srcLen;
- if ((*len) <= srcLen) {
- (*len) = 0;
- return false;
- }
- if (cap < (*len)) {
- return false;
- }
- buf[0] = KEKSAtomNIL;
- memcpy(buf + 1, src, srcLen);
- return true;
-}
-
bool
KEKSAtomTAI64Encode(
size_t *len,
// (size_t *len, unsigned char *buf, const size_t cap, const size_t chunkLen)
// Encode BLOB atom in provided @var{buf} with capacity of @var{cap}.
// In case of success, true is returned and @var{len} will hold how many
-// bytes were written to buffer. You must call @ref{KEKSAtomChunkEncode}
-// functions for subsequent chunks, and terminate the blob with
-// @ref{KEKSAtomBinEncode}.
+// bytes were written to buffer. You must call @ref{KEKSAtomBinEncode}
+// functions for subsequent chunks, and terminator.
// @end deftypefun
bool
KEKSAtomBlobEncode(
const unsigned char *src,
const size_t srcLen);
-// TEXINFO: KEKSAtomChunkEncode
-// @deftypefun bool KEKSAtomChunkEncode @
-// (size_t *len, unsigned char *buf, const size_t cap, @
-// const unsigned char *src, const size_t srcLen)
-// Encode the chunk in provided @var{buf} with capacity of @var{cap}.
-// In case of success, true is returned and @var{len} will hold how many
-// bytes were written to buffer. It is just a convenient wrapper instead
-// of using @ref{KEKSAtomNILEncode} followed by @var{srcLen} bytes.
-// @end deftypefun
-bool
-KEKSAtomChunkEncode(
- size_t *len,
- unsigned char *buf,
- const size_t cap,
- const unsigned char *src,
- const size_t srcLen);
-
// TEXINFO: KEKSAtomTAI64Encode
// @deftypefun bool KEKSAtomTAI64Encode @
// (size_t *len, unsigned char *buf, const size_t cap, @
return "IntNonMinimal";
case KEKSErrBlobBadAtom:
return "BlobBadAtom";
- case KEKSErrBlobBadTerm:
- return "BlobBadTerm";
+ case KEKSErrBlobBadChunkLen:
+ return "BlobBadChunkLen";
case KEKSErrTAI64TooBig:
return "TAI64TooBig";
case KEKSErrTAI64BadNsec:
// Non minimal integer encoding.
// @item KEKSErrBlobBadAtom
// Blob contains unexpected atom.
-// @item KEKSErrBlobBadTerm
-// Blob contains invalid terminator.
+// @item KEKSErrBlobBadChunkLen
+// Blob contains chunk with invalid length.
// @item KEKSErrTAI64TooBig
// Too large TAI64 value, out-of-bounds.
// @item KEKSErrTAI64BadNsec
KEKSErrIntNonBin,
KEKSErrIntNonMinimal,
KEKSErrBlobBadAtom,
- KEKSErrBlobBadTerm,
+ KEKSErrBlobBadChunkLen,
KEKSErrTAI64TooBig,
KEKSErrTAI64BadNsec,
KEKSErrTAI64BadAsec,
}
cur = idx + 1;
struct KEKSAtom *atom = &(items->list[cur].atom);
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wswitch-enum"
- switch (atom->typ) {
-#pragma clang diagnostic pop
- case KEKSItemNIL:
- atom->v.str.len = chunkLen;
- assert(len >= (*off));
- if ((len - (*off)) <= chunkLen) {
- return KEKSErrNotEnough;
- }
- atom->typ = KEKSItemBin;
- atom->v.str.ptr = buf + *off;
- if ((SIZE_MAX - chunkLen) < (*off)) {
- return KEKSErrLenTooBig;
- }
- (*off) += chunkLen;
- break;
- case KEKSItemBin:
- if (atom->v.str.len >= chunkLen) {
- return KEKSErrBlobBadTerm;
- }
- eoc = true;
- break;
- default:
+ if (atom->typ != KEKSItemBin) {
return KEKSErrBlobBadAtom;
}
+ if (atom->v.str.len == chunkLen) {
+ } else if (atom->v.str.len < chunkLen) {
+ eoc = true;
+ } else {
+ return KEKSErrBlobBadChunkLen;
+ }
if (prev != 0) {
items->list[prev].next = cur;
}
break;
}
assert(cap >= (*off));
- ok = KEKSAtomChunkEncode(
+ ok = KEKSAtomBinEncode(
&got,
buf + *off,
cap - (*off),
+++ /dev/null
-// ckeks -- C KEKS encoder implementation
-// Copyright (C) 2024-2025 Sergey Matveev <stargrave@stargrave.org>
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Lesser General Public License as
-// published by the Free Software Foundation, version 3 of the License.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "dec.h"
-#include "err.h"
-#include "iter.h"
-
-enum KEKSErr
-KEKSIterList(
- void *cbState,
- struct KEKSAtom *atom,
- size_t *off,
- const unsigned char *buf,
- const size_t len,
- KEKSIterCb cb)
-{
- size_t got = 0;
- enum KEKSErr err = KEKSErrInvalid;
- bool eoc = false;
- for (size_t n = 0;; n++) {
- assert(len >= (*off));
- err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off));
- if (err != KEKSErrNo) {
- return err;
- }
- if ((SIZE_MAX - got) < (*off)) {
- return KEKSErrLenTooBig;
- }
- (*off) += got;
- eoc = atom->typ == KEKSItemEOC;
- err = cb(NULL, 0, !eoc, n, cbState, atom, off, buf, len);
- if (err != KEKSErrNo) {
- return err;
- }
- if (eoc) {
- break;
- }
- }
- return KEKSErrNo;
-}
-
-enum KEKSErr
-KEKSIterMap(
- void *cbState,
- struct KEKSAtom *atom,
- size_t *off,
- const unsigned char *buf,
- const size_t len,
- KEKSIterCb cb)
-{
- enum KEKSErr err = KEKSErrInvalid;
- size_t got = 0;
- const unsigned char *key = NULL;
- size_t keyLen = 0;
- for (;;) {
- assert(len >= (*off));
- err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off));
- if (err != KEKSErrNo) {
- return err;
- }
- if ((SIZE_MAX - got) < (*off)) {
- return KEKSErrLenTooBig;
- }
- (*off) += got;
- if (atom->typ == KEKSItemEOC) {
- err = cb(NULL, 0, false, 0, cbState, atom, off, buf, len);
- if (err != KEKSErrNo) {
- return err;
- }
- break;
- }
- if (atom->typ != KEKSItemStr) {
- return KEKSErrMapBadKey;
- }
- if (atom->v.str.len == 0) {
- return KEKSErrMapBadKey;
- }
- if (atom->v.str.len < keyLen) {
- return KEKSErrMapUnordered;
- }
- if ((atom->v.str.len == keyLen) &&
- (memcmp(key, atom->v.str.ptr, keyLen) >= 0)) {
- return KEKSErrMapUnordered;
- }
- keyLen = atom->v.str.len;
- key = atom->v.str.ptr;
- assert(len >= (*off));
- err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off));
- if (err != KEKSErrNo) {
- return err;
- }
- if ((SIZE_MAX - got) < (*off)) {
- return KEKSErrLenTooBig;
- }
- (*off) += got;
- if (atom->typ == KEKSItemEOC) {
- return KEKSErrUnexpectedEOC;
- }
- err = cb(key, keyLen, false, 0, cbState, atom, off, buf, len);
- if (err != KEKSErrNo) {
- return err;
- }
- }
- return KEKSErrNo;
-}
-
-enum KEKSErr
-KEKSIterBlob(
- void *cbState,
- struct KEKSAtom *atom,
- size_t *off,
- const unsigned char *buf,
- const size_t len,
- KEKSIterCb cb)
-{
- const size_t chunkLen = atom->v.blob.chunkLen;
- enum KEKSErr err = KEKSErrInvalid;
- size_t got = 0;
- bool eoc = false;
- for (size_t n = 0; !eoc; n++) {
- assert(len >= (*off));
- err = KEKSAtomDecode(&got, atom, buf + *off, len - (*off));
- if (err != KEKSErrNo) {
- return err;
- }
- if ((SIZE_MAX - got) < (*off)) {
- return KEKSErrLenTooBig;
- }
- (*off) += got;
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wswitch-enum"
- switch (atom->typ) {
-#pragma clang diagnostic pop
- case KEKSItemNIL:
- assert(len >= (*off));
- if ((len - *off) <= chunkLen) {
- atom->v.str.len = chunkLen;
- return KEKSErrNotEnough;
- }
- atom->typ = KEKSItemBin;
- atom->v.str.ptr = buf + *off;
- atom->v.str.len = chunkLen;
- if ((SIZE_MAX - chunkLen) < (*off)) {
- return KEKSErrLenTooBig;
- }
- (*off) += chunkLen;
- break;
- case KEKSItemBin:
- if ((atom->v.str.len) >= chunkLen) {
- return KEKSErrBlobBadTerm;
- }
- eoc = true;
- break;
- default:
- return KEKSErrBlobBadAtom;
- }
- err = cb(NULL, 0, true, n, cbState, atom, off, buf, len);
- if (err != KEKSErrNo) {
- return err;
- }
- }
- atom->typ = KEKSItemEOC;
- err = cb(NULL, 0, false, 0, cbState, atom, off, buf, len);
- return err;
-}
+++ /dev/null
-#ifndef KEKS_ITER_H
-#define KEKS_ITER_H
-
-#include <stdbool.h>
-#include <stddef.h>
-
-#include "dec.h"
-#include "err.h"
-
-typedef enum KEKSErr (*KEKSIterCb)(
- const unsigned char *key,
- const size_t keyLen,
- const bool inList,
- const size_t idx,
- void *cbState,
- struct KEKSAtom *atom,
- size_t *off,
- const unsigned char *buf,
- const size_t len);
-
-enum KEKSErr
-KEKSIterList(
- void *cbState,
- struct KEKSAtom *atom,
- size_t *off,
- const unsigned char *buf,
- const size_t len,
- KEKSIterCb cb);
-
-enum KEKSErr
-KEKSIterMap(
- void *cbState,
- struct KEKSAtom *atom,
- size_t *off,
- const unsigned char *buf,
- const size_t len,
- KEKSIterCb cb);
-
-enum KEKSErr
-KEKSIterBlob(
- void *cbState,
- struct KEKSAtom *atom,
- size_t *off,
- const unsigned char *buf,
- const size_t len,
- KEKSIterCb cb);
-
-#endif // KEKS_ITER_H
err.o
frombe.o
items.o
-iter.o
leapsecs.o
tobe.o
utf8.o
}
// Write an encoded BLOB atom.
-// You have to manually provide necessary chunks and
-// properly terminate it with BinEncode.
-func BlobEncode(
- w io.Writer,
- chunkLen int64,
- r io.Reader,
-) (written int64, err error) {
+func BlobEncode(w io.Writer, chunkLen int64, r io.Reader) (written int64, err error) {
{
l := make([]byte, 9)
l[0] = byte(AtomBLOB)
}
return
}
- n64, err = ByteEncode(w, byte(AtomNIL))
+ n64, err = BinEncode(w, chunk)
if err != nil {
return
}
written += n64
- n64, err = io.Copy(w, bytes.NewReader(chunk))
- written += n64
- if err != nil {
- return
- }
}
n64, err = BinEncode(w, chunk)
written += n64
}
func (blob *BlobReader) String() string {
- return fmt.Sprintf("BLOB(%d, ~)", blob.ChunkLen)
+ return fmt.Sprintf("BLOB(%d, ?)", blob.ChunkLen)
}
)
var (
- ErrMapBadKey = errors.New("map bad key")
- ErrMapUnordered = errors.New("map unordered")
- ErrBlobBadAtom = errors.New("blob unexpected atom")
- ErrBlobBadTerm = errors.New("blob bad terminator")
- ErrUnexpectedEOC = errors.New("unexpected EOC")
- ErrTooDeep = errors.New("too deep structure")
+ ErrMapBadKey = errors.New("map bad key")
+ ErrMapUnordered = errors.New("map unordered")
+ ErrBlobBadAtom = errors.New("blob unexpected atom")
+ ErrBlobBadChunkLen = errors.New("blob bad chunk len")
+ ErrUnexpectedEOC = errors.New("unexpected EOC")
+ ErrTooDeep = errors.New("too deep structure")
)
func (ctx *Decoder) deTail() {
var chunks []string
var sub types.Type
var s string
- BlobCycle:
for {
sub, err = ctx.DecodeAtom()
if err != nil {
return
}
- switch sub {
- case types.NIL:
- ctx.deTail()
- s, err = ctx.getBytes(int(chunkLen))
- if err != nil {
- return
- }
+ if sub != types.Bin {
+ err = ErrBlobBadAtom
+ return
+ }
+ s = ctx.strs[len(ctx.strs)-1]
+ ctx.deTail()
+ ctx.strs = ctx.strs[:len(ctx.strs)-1]
+ if int64(len(s)) == chunkLen {
chunks = append(chunks, s)
- case types.Bin:
- s = ctx.strs[len(ctx.strs)-1]
- if int64(len(s)) >= chunkLen {
- err = ErrBlobBadTerm
- return
- }
+ } else if int64(len(s)) < chunkLen {
if len(s) != 0 {
chunks = append(chunks, s)
}
- ctx.deTail()
- ctx.strs = ctx.strs[:len(ctx.strs)-1]
- break BlobCycle
- default:
- err = ErrBlobBadAtom
+ break
+ } else {
+ err = ErrBlobBadChunkLen
return
}
}
append = raws.append
chunks = len(v) // l
for i in range(chunks):
- append(dumps(None))
- append(v[i*l:(i+1)*l])
+ append(dumps(v[i*l:(i+1)*l]))
left = len(v) - chunks*l
assert left < l
append(dumps(b"") if (left == 0) else dumps(v[-left:]))
raws = []
while True:
i, v = _loads(v, _allowContainers=False)
- if i is None:
- if len(v) < l:
- raise NotEnoughData(l-len(v)+1)
- raws.append(v[:l])
- v = v[l:]
- elif isinstance(i, bytes):
- if len(i) >= l:
- raise DecodeError("wrong terminator len")
+ if not isinstance(i, bytes):
+ raise DecodeError("unexpected tag")
+ if len(i) == l:
+ raws.append(i)
+ elif len(i) < l:
raws.append(i)
break
else:
- raise DecodeError("unexpected tag")
+ raise DecodeError("wrong chunk len")
return Blob(l, b"".join(raws)), v
raise DecodeError("unknown tag")
encoded,
b"".join((
bytes.fromhex("0B0000000000000003"),
- bytes.fromhex("01"), b"test",
- bytes.fromhex("01"), b"data",
+ bytes.fromhex("84"), b"test",
+ bytes.fromhex("84"), b"data",
bytes.fromhex("80"),
)),
)
encoded,
b"".join((
bytes.fromhex("0B0000000000000003"),
- bytes.fromhex("01"), b"test",
- bytes.fromhex("01"), b"data",
+ bytes.fromhex("84"), b"test",
+ bytes.fromhex("84"), b"data",
bytes.fromhex("81"), b"2",
)),
)
encoded = b"".join((
b"\x0b",
(chunkLen-1).to_bytes(8, "big"),
- b"".join((b"\x01" + chunk) for chunk in chunks),
+ b"".join(dumps(chunk) for chunk in chunks),
b"\x80",
junk,
))
def test_throws_when_not_enough_data(self) -> None:
encoded = b"".join((
bytes.fromhex("0B0000000000000003"),
- bytes.fromhex("01"), b"test",
- bytes.fromhex("01"), b"da",
+ bytes.fromhex("84"), b"test",
+ bytes.fromhex("84"), b"da",
))
with self.assertRaises(NotEnoughData) as err:
loads(encoded)
- self.assertEqual(err.exception.n, 3)
+ self.assertEqual(err.exception.n, 2)
def test_throws_when_not_enough_data_for_length(self) -> None:
encoded = bytes.fromhex("0B00000000")
def test_throws_when_wrong_terminator_length(self) -> None:
encoded = b"".join((
bytes.fromhex("0B0000000000000003"),
- bytes.fromhex("01"), b"test",
- bytes.fromhex("01"), b"data",
+ bytes.fromhex("84"), b"test",
+ bytes.fromhex("84"), b"data",
bytes.fromhex("8A"), b"terminator",
))
with self.assertRaises(DecodeError) as err:
loads(encoded)
- self.assertEqual(str(err.exception), "wrong terminator len")
+ self.assertEqual(str(err.exception), "wrong chunk len")
def test_throws_when_wrong_terminator_tag(self) -> None:
encoded = b"".join((
bytes.fromhex("0B0000000000000003"),
- bytes.fromhex("01"), b"test",
- bytes.fromhex("01"), b"data",
+ bytes.fromhex("84"), b"test",
+ bytes.fromhex("84"), b"data",
bytes.fromhex("04"), b"that was a wrong tag",
))
with self.assertRaises(DecodeError) as err:
in a streaming way, when data may not fit in memory.
64-bit big-endian integer follows the BLOB tag, setting the following
-chunks payload size (+1). Then come zero or more NIL tags, each followed
-by fixed-length payload. Blob is terminated by @ref{Strings, BIN},
-probably having zero length.
+chunks payload size (+1). Then come one or more @ref{BIN} strings with
+the chunk-length payload. All of them, except for the last one, must
+have fixed chunk length payload. Last terminating string's payload must
+be shorter.
Data format definition must specify exact chunk size expected to be
used, if it needs deterministic encoding.
@verbatim
-BLOB len [NIL || payload0 || NIL || payload1 || ...] BIN
+BLOB chunk-len [BIN(len=chunk-len) || ...] BIN(len<chunk-len)
@end verbatim
@multitable @columnfractions .5 .5
@item BLOB(5, "") @tab @code{0B 0000000000000004 80}
-@item BLOB(5, "12345") @tab @code{0B 0000000000000004 01 3132333435 80}
-@item BLOB(5, "123456") @tab @code{0B 0000000000000004 01 3132333435 81 36}
+@item BLOB(5, "12345") @tab @code{0B 0000000000000004 85 3132333435 80}
+@item BLOB(5, "123456") @tab @code{0B 0000000000000004 85 3132333435 81 36}
@item BLOB(500, "123") @tab @code{0B 00000000000001F3 83 313233}
-@item BLOB(2, "12345") @tab @code{0B 0000000000000001 01 3132 01 3334 81 35}
+@item BLOB(2, "12345") @tab @code{0B 0000000000000001 82 3132 82 3334 81 35}
@end multitable
set vl [string length $v]
set chunks [expr {$vl / $chunkLen}]
for {set i 0} {$i < $chunks} {incr i} {
- NIL
- add [string range $v \
+ BIN [string range $v \
[expr {$i * $chunkLen}] \
[expr {(($i + 1) * $chunkLen) - 1}]]
}