From 40a619316e173bf8e06e546c55ac1edfea9e5e09666f50fbd330045943a6c507 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Sun, 13 Oct 2024 20:36:51 +0300 Subject: [PATCH] Non-streamable parser --- cyac/README | 1 + cyac/cmd/.gitignore | 3 +- cyac/cmd/all.do | 2 +- cyac/cmd/clean | 2 +- cyac/cmd/hex.c.in | 22 +++ cyac/cmd/{print.c => print-itered.c} | 29 +-- cyac/cmd/print-pooled.c | 274 +++++++++++++++++++++++++++ cyac/dec.c | 3 +- cyac/dec.h | 115 +++++++---- cyac/dectai.c | 2 +- cyac/dectai.h | 2 +- cyac/default.o.do | 2 +- cyac/err.h | 66 +++++++ cyac/h-extract.pl | 11 ++ cyac/iter.c | 4 +- cyac/iter.h | 1 + cyac/o.list | 1 + cyac/pool.c | 265 ++++++++++++++++++++++++++ cyac/pool.h | 54 ++++++ 19 files changed, 789 insertions(+), 70 deletions(-) create mode 100644 cyac/cmd/hex.c.in rename cyac/cmd/{print.c => print-itered.c} (91%) create mode 100644 cyac/cmd/print-pooled.c create mode 100644 cyac/err.h create mode 100755 cyac/h-extract.pl create mode 100644 cyac/pool.c create mode 100644 cyac/pool.h diff --git a/cyac/README b/cyac/README index c12c3fb..db5310b 100644 --- a/cyac/README +++ b/cyac/README @@ -12,6 +12,7 @@ decoded atom. dectai.* contains converter from TAI64 to UTC. leapsecs.* contains the leap seconds database itself. iter.* contains helpers that may pass over the iterables. +pool.* contains a non-streamable parser. enc.* contains encoders for various atoms. Containers and blobs must be made manually, by finishing them with proper EOC/BIN and sorting the diff --git a/cyac/cmd/.gitignore b/cyac/cmd/.gitignore index 141e7b8..f5062f2 100644 --- a/cyac/cmd/.gitignore +++ b/cyac/cmd/.gitignore @@ -1,2 +1,3 @@ -/print +/print-itered +/print-pooled /test-vector diff --git a/cyac/cmd/all.do b/cyac/cmd/all.do index f4beb87..f14d17c 100644 --- a/cyac/cmd/all.do +++ b/cyac/cmd/all.do @@ -1 +1 @@ -redo-ifchange print test-vector +redo-ifchange print-itered print-pooled test-vector diff --git a/cyac/cmd/clean b/cyac/cmd/clean index f59dba6..dcefe26 100755 --- a/cyac/cmd/clean +++ b/cyac/cmd/clean @@ -1,3 +1,3 @@ #!/bin/sh -e -exec rm -f print test-vector +exec rm -f print-itered print-pooled test-vector diff --git a/cyac/cmd/hex.c.in b/cyac/cmd/hex.c.in new file mode 100644 index 0000000..b941ccf --- /dev/null +++ b/cyac/cmd/hex.c.in @@ -0,0 +1,22 @@ +#include + +static const size_t maxStrLen = 40; + +static const char hexdigits[] = "0123456789ABCDEF"; + +static char * +HexEnc(const unsigned char *src, const size_t srcLen) +{ + // it was based on libressl/crypto/x509v3/v3_utl.c:hex_to_string + char *dst = (char *)malloc(1 + srcLen * 2); + if (dst == NULL) { + return NULL; + } + size_t i = 0; + for (; i < srcLen; i++) { + dst[(i * 2) + 0] = hexdigits[(src[i] >> 4) & 0x0F]; + dst[(i * 2) + 1] = hexdigits[src[i] & 0x0F]; + } + dst[srcLen * 2] = 0; + return dst; +} diff --git a/cyac/cmd/print.c b/cyac/cmd/print-itered.c similarity index 91% rename from cyac/cmd/print.c rename to cyac/cmd/print-itered.c index 9302717..f8a52d7 100644 --- a/cyac/cmd/print.c +++ b/cyac/cmd/print-itered.c @@ -27,28 +27,10 @@ #include #include +#include #include -static const size_t maxStrLen = 40; - -static const char hexdigits[] = "0123456789ABCDEF"; - -static char * -HexEnc(const unsigned char *src, const size_t srcLen) -{ - // it was based on libressl/crypto/x509v3/v3_utl.c:hex_to_string - char *dst = (char *)malloc(1 + srcLen * 2); - if (dst == NULL) { - return NULL; - } - size_t i = 0; - for (; i < srcLen; i++) { - dst[(i * 2) + 0] = hexdigits[(src[i] >> 4) & 0x0F]; - dst[(i * 2) + 1] = hexdigits[src[i] & 0x0F]; - } - dst[srcLen * 2] = 0; - return dst; -} +#include "hex.c.in" struct CbState { ptrdiff_t indent; @@ -68,9 +50,7 @@ myCb( struct CbState *state = (struct CbState *)(cbState); if ((atom->typ) == YACItemEOC) { state->indent--; - if (state->indent < 0) { - return YACErrUnexpectedEOC; - } + assert(state->indent >= 0); } printf("%04zd ", *off); for (ptrdiff_t i = 0; i < state->indent; i++) { @@ -216,7 +196,8 @@ myCb( free(hex); break; default: - fprintf(stdout, "unknown atom\n"); + fprintf(stderr, "unknown atom\n"); + return EXIT_FAILURE; } return YACErrNo; } diff --git a/cyac/cmd/print-pooled.c b/cyac/cmd/print-pooled.c new file mode 100644 index 0000000..dc273aa --- /dev/null +++ b/cyac/cmd/print-pooled.c @@ -0,0 +1,274 @@ +// cyac -- C YAC encoder implementation +// Copyright (C) 2024 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "hex.c.in" + +static void +printIndent(const ptrdiff_t indent) +{ + for (ptrdiff_t i = 0; i < indent; i++) { + fputs(" ", stdout); + } +} + +static enum YACErr +printer( + const struct YACItemPool *pool, + ptrdiff_t idx, + ptrdiff_t indent, + ptrdiff_t listIdx, + const char *mapKey) +{ + struct YACItem *item = &(pool->list[idx]); + printf("%04zd ", item->off); + if (item->atom.typ == YACItemEOC) { + indent--; + assert(indent >= 0); + } + printIndent(indent); + if (listIdx >= 0) { + fprintf(stdout, "%zu: ", listIdx); + } + if (mapKey != NULL) { + fprintf(stdout, "%s: ", mapKey); + } + char *str = NULL; + enum YACErr err = YACErrInvalid; + switch (item->atom.typ) { + case YACItemNIL: + fputs("NIL\n", stdout); + break; + case YACItemFalse: + fputs("FALSE\n", stdout); + break; + case YACItemTrue: + fputs("TRUE\n", stdout); + break; + case YACItemUUID: + printf( + "UUID(%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x)\n", + item->atom.val.buf[0], + item->atom.val.buf[1], + item->atom.val.buf[2], + item->atom.val.buf[3], + item->atom.val.buf[4], + item->atom.val.buf[5], + item->atom.val.buf[6], + item->atom.val.buf[7], + item->atom.val.buf[8], + item->atom.val.buf[9], + item->atom.val.buf[10], + item->atom.val.buf[11], + item->atom.val.buf[12], + item->atom.val.buf[13], + item->atom.val.buf[14], + item->atom.val.buf[15]); + break; + case YACItemUint: + fprintf(stdout, "%zu\n", item->atom.val.uint); + break; + case YACItemSint: + fprintf(stdout, "%zd\n", item->atom.val.sint); + break; + case YACItemList: { + printf("[ %zd\n", item->atom.len); + indent++; + idx = item->atom.val.first; + listIdx = 0; + while (idx != -1) { + err = printer(pool, idx, indent, listIdx, NULL); + if (err != YACErrNo) { + return err; + } + idx = pool->list[idx].next; + listIdx++; + } + fputs(" ", stdout); + indent--; + printIndent(indent); + fputs("]\n", stdout); + break; + } + case YACItemMap: { + printf("{ %zd\n", item->atom.len); + indent++; + idx = item->atom.val.first; + while (idx != -1) { + str = strndup( + (const char *)(pool->list[idx].atom.val.buf), pool->list[idx].atom.len); + idx = pool->list[idx].next; + err = printer(pool, idx, indent, -1, str); + free(str); + if (err != YACErrNo) { + return err; + } + idx = pool->list[idx].next; + } + fputs(" ", stdout); + indent--; + printIndent(indent); + fputs("}\n", stdout); + break; + } + case YACItemBlob: + printf("BLOB[ %zu l=%zu\n", item->atom.len, item->atom.val.uint); + indent++; + idx++; + listIdx = 0; + while (idx != -1) { + err = printer(pool, idx, indent, listIdx, NULL); + if (err != YACErrNo) { + return err; + } + idx = pool->list[idx].next; + listIdx++; + } + fputs(" ", stdout); + indent--; + printIndent(indent); + fputs("]\n", stdout); + break; + case YACItemFloat: + fputs("FLOAT: TODO\n", stdout); + break; + case YACItemTAI64: { + if ((item->atom.len) == 16) { + str = HexEnc(item->atom.val.buf, item->atom.len); + fprintf(stdout, "TAI64NA(%s)\n", str); + free(str); + break; + } + switch (item->atom.len) { + case 8: + fputs("TAI64(", stdout); + break; + case 12: + fputs("TAI64N(", stdout); + break; + } + struct timeval tv; + err = YACTAI64ToTimeval(&tv, item->atom.val.buf, item->atom.len); + if (err == YACErrTAI64BadNsec) { + str = HexEnc(item->atom.val.buf, item->atom.len); + fprintf(stdout, "unrepresentable: %s)\n", str); + free(str); + break; + } + if (err != YACErrNo) { + return err; + } + time_t t = tv.tv_sec; + struct tm *tm = localtime(&t); + if (tm == NULL) { + str = HexEnc(item->atom.val.buf, item->atom.len); + fprintf(stdout, "unrepresentable: %s)\n", str); + free(str); + break; + } + char human[20] = {0}; + strftime(human, sizeof human, "%Y-%m-%d %H:%M:%S", tm); + fputs(human, stdout); + if ((item->atom.len) == 12) { + fprintf(stdout, ".%zu", tv.tv_usec); + } + fputs(")\n", stdout); + break; + } + case YACItemBin: { + const size_t l = (item->atom.len > maxStrLen) ? maxStrLen : item->atom.len; + str = HexEnc(item->atom.val.buf, l); + fprintf( + stdout, + "%zu:%s%s\n", + item->atom.len, + str, + (item->atom.len > maxStrLen) ? "..." : ""); + free(str); + break; + } + case YACItemStr: { + const size_t l = (item->atom.len > maxStrLen) ? maxStrLen : item->atom.len; + str = strndup((const char *)item->atom.val.buf, l); + fprintf(stdout, "\"%s%s\"\n", str, (item->atom.len > maxStrLen) ? "..." : ""); + free(str); + break; + } + case YACItemRaw: + str = HexEnc(item->atom.val.buf, item->atom.len); + fprintf(stdout, "(t=0x%X l=%zu v=%s)\n", item->atom.tag, item->atom.len, str); + free(str); + break; + case YACItemEOC: + default: + fprintf(stderr, "unknown atom\n"); + return EXIT_FAILURE; + } + return YACErrNo; +} + +int +main(int argc, char **argv) +{ + size_t len = 0; + unsigned char *buf = NULL; + { + int fd = open(argv[1], O_RDONLY | O_CLOEXEC); + if (fd == -1) { + fprintf(stderr, "%s\n", strerror(errno)); + return EXIT_FAILURE; + } + struct stat sb; + memset(&sb, 0, sizeof(struct stat)); + if (fstat(fd, &sb) != 0) { + fprintf(stderr, "%s\n", strerror(errno)); + return EXIT_FAILURE; + } + len = (size_t)sb.st_size; + buf = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); + } + + struct YACItemPool pool; + YACItemPoolInit(&pool); + ptrdiff_t off = 0; + enum YACErr err = YACItemPoolParse(&pool, &off, buf, len); + if (err != YACErrNo) { + fprintf(stderr, "err: %d\n", err); + return EXIT_FAILURE; + } + err = printer(&pool, 0, 0, -1, NULL); + if (err != YACErrNo) { + fprintf(stderr, "err: %d\n", err); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/cyac/dec.c b/cyac/dec.c index 88650d9..b2cb770 100644 --- a/cyac/dec.c +++ b/cyac/dec.c @@ -18,6 +18,7 @@ #include "atoms.h" #include "dec.h" +#include "err.h" #include "frombe.h" #include "utf8.h" @@ -103,7 +104,7 @@ YACAtomDecode(struct YACAtom *atom, const unsigned char *buf, const size_t len) return YACErrNotEnough; } if (buf[1] == 0) { - return YACErrIntZeroByte; + return YACErrIntNonMinimal; } if (l > 8) { atom->typ = YACItemRaw; diff --git a/cyac/dec.h b/cyac/dec.h index 0fa7ba0..e8cb550 100644 --- a/cyac/dec.h +++ b/cyac/dec.h @@ -4,57 +4,96 @@ #include #include +#include "err.h" + enum YACItemType { YACItemEOC = 0, YACItemNIL = 1, YACItemFalse, YACItemTrue, - YACItemUUID, // atom.val.buf - YACItemUint, // atom.val.uint - YACItemSint, // atom.val.sint + YACItemUUID, + YACItemUint, + YACItemSint, YACItemList, YACItemMap, YACItemBlob, - YACItemFloat, // atom.val.flt - YACItemTAI64, // atom.val.buf, atom.len - YACItemBin, // atom.val.buf, atom.len - YACItemStr, // atom.val.buf, atom.len - YACItemRaw, // atom.tag, atom.val.buf, atom.len -}; - -enum YACErr { - YACErrInvalid = 0, // unset error - YACErrNo = 1, // everything is good - YACErrNotEnough, // not enough data (atom.off is how much) - YACErrUnknownType, // unknown atom's type - YACErrLenNonMinimal, // non-minimal string length coding - YACErrLenTooBig, // string length >1<<60 - YACErrBadUTF8, // invalid UTF-8 codepoint - YACErrIntZeroByte, // non-minimal integer coding - YACErrIntNonMinimal, // non-minimal integer coding - YACErrBlobBadLen, // absent or invalid chunk length - YACErrBlobBadAtom, // unexpected atom inside - YACErrBlobBadTerm, // invalid termination atom - YACErrBlobShortChunk, // not enough data - YACErrTAI64TooBig, // use of reserved values - YACErrTAI64BadNsec, // invalid nanoseconds value - YACErrTAI64BadAsec, // invalid attoseconds value - YACErrMapBadKey, // bad type of a key - YACErrMapNoVal, // missing value - YACErrMapUnordered, // unordered keys - YACErrUnexpectedEOC, // unexpected EOC caught + YACItemFloat, + YACItemTAI64, + YACItemBin, + YACItemStr, + YACItemRaw, }; +// @deftypevar struct YACAtom +// @code{.off}set is the length of the whole atom. +// @code{.tag} contains the real type of the atom, its first byte. +// @code{.typ} contains high-level atom type. +// All other fields are interpreted according to the type: +// @table @code +// @item YACItemEOC +// No additional fields are used. +// @item YACItemNIL +// No additional fields are used. +// @item YACItemFalse +// No additional fields are used. +// @item YACItemTrue +// No additional fields are used. +// @item YACItemUUID +// @code{.val.buf} contains the 16-byte UUID value. +// @item YACItemUint +// @code{.val.uint} contains positive integer's value. +// @item YACItemSint +// @code{.val.sint} contains negative integer's value. +// @item YACItemList +// No additional fields are used, if parsed through +// @code{YACAtomDecode()}. +// If parsed through the @code{YACItemPoolParse()}, then +// @code{.len} contains the number of elements in a list, +// @code{.val.first} is the pool index of the first element. +// It equals to -1, if list is empty. +// @item YACItemMap +// No additional fields are used, if parsed through +// @code{YACAtomDecode()}. +// If parsed through the @code{YACItemPoolParse()}, then +// @code{.len} contains the number of elements in a map, +// @code{.val.first} is the pool index of the first element's key. +// Key's item @code{.next} points to the value, that points to the +// next key, and so on. +// @code{.val.first} equals to -1, if map is empty. +// @item YACItemBlob +// @code{.val.uint} contains the length of the chunk. +// If parsed through the @code{YACItemPoolParse()}, then +// @code{.len} contains the number of chunks, including the +// terminating binary string, that may be empty. +// @code{.val.first} is the pool index of the first chunk. +// @item YACItemFloat +// @code{.val.TODO} contains float's value. +// @item YACItemTAI64 +// @code{.len} contains the length of the TAI64, that is either 8, +// 12, or 16 bytes long. @code{.val.buf} points to the value itself. +// @item YACItemBin +// @code{.len} contains the length of the string. +// @code{.val.buf} points to the value itself. +// @item YACItemStr +// @code{.len} contains the length of the string. +// @code{.val.buf} points to the value itself. +// @item YACItemRaw +// @code{.tag} is the raw value's tag, its first byte. +// @code{.len} contains the length of its value. +// @code{.val.buf} points to its value. +// @end table +// @end deftypevar struct YACAtom { - ptrdiff_t off; // length of the whole atom - size_t len; // length of the strings, TAI64, raw values + ptrdiff_t off; + size_t len; union { - uint64_t uint; // unsigned integer's value, blob's chunk len - int64_t sint; // negative signed integer's value - const unsigned char *buf; // strings, TAI64, UUID value + uint64_t uint; + int64_t sint; + const unsigned char *buf; + ptrdiff_t first; } val; - enum YACItemType typ; // type of the item, consolidated - unsigned char tag; // real type of the atom + enum YACItemType typ; + unsigned char tag; char _pad[3]; }; diff --git a/cyac/dectai.c b/cyac/dectai.c index 4e459e2..3f3fcff 100644 --- a/cyac/dectai.c +++ b/cyac/dectai.c @@ -17,8 +17,8 @@ #include #include -#include "dec.h" #include "dectai.h" +#include "err.h" #include "frombe.h" #include "leapsecs.h" diff --git a/cyac/dectai.h b/cyac/dectai.h index 2341bd7..a12a82f 100644 --- a/cyac/dectai.h +++ b/cyac/dectai.h @@ -5,7 +5,7 @@ #include #include -#include "dec.h" +#include "err.h" enum YACErr YACTAI64ToTimeval(struct timeval *tv, const unsigned char *buf, const size_t len); diff --git a/cyac/default.o.do b/cyac/default.o.do index 013ecf6..fe9918a 100644 --- a/cyac/default.o.do +++ b/cyac/default.o.do @@ -1,4 +1,4 @@ -redo-ifchange $2.c $2.h conf/cc conf/cflags +redo-ifchange $2.c $(./h-extract.pl $2.[ch]) conf/cc conf/cflags read CC 1<<60), can not be decoded. +// @item YACErrBadUTF8 +// Invalid UTF-8 codepoint or zero byte met. +// @item YACErrIntNonMinimal +// Non minimal integer encoding. +// @item YACErrBlobBadLen +// Blob with invalid chunk length. +// @item YACErrBlobBadAtom +// Blob contains unexpected atom. +// @item YACErrBlobBadTerm +// Blob contains invalid terminator. +// @item YACErrTAI64TooBig +// Too large TAI64 value, out-of-bounds. +// @item YACErrTAI64BadNsec +// Invalid TAI64 nanoseconds value. +// @item YACErrTAI64BadAsec +// Invalid TAI64 attoseconds value. +// @item YACErrMapBadKey +// Either bad type of map's key, or it is empty. +// @item YACErrMapNoVal +// Missing value in a map. +// @item YACErrMapUnordered +// Unordered map keys. +// @item YACErrNoMem +// Not enough memory for allocation. +// @end table +// @end deftypevar +enum YACErr { + YACErrInvalid = 0, + YACErrNo = 1, + YACErrNotEnough, + YACErrUnknownType, + YACErrLenNonMinimal, + YACErrLenTooBig, + YACErrBadUTF8, + YACErrIntZeroByte, + YACErrIntNonMinimal, + YACErrBlobBadLen, + YACErrBlobBadAtom, + YACErrBlobBadTerm, + YACErrTAI64TooBig, + YACErrTAI64BadNsec, + YACErrTAI64BadAsec, + YACErrMapBadKey, + YACErrMapNoVal, + YACErrMapUnordered, + YACErrNoMem, +}; + +#endif // YAC_ERR_H diff --git a/cyac/h-extract.pl b/cyac/h-extract.pl new file mode 100755 index 0000000..52c770c --- /dev/null +++ b/cyac/h-extract.pl @@ -0,0 +1,11 @@ +#!/usr/bin/env perl +# Extract all locally included header files (not <>-ones, but "") + +# hack, to badly exit if there is unexistent file +$SIG{__WARN__} = sub { die @_ }; + +map { $inc{$_} = 1 } @ARGV; +while (<>) { + /^#include "([^\/]+)"$/ and ($1 !~ /\.in$/) and $inc{$1} = 1; +}; +print join " ", sort keys %inc; diff --git a/cyac/iter.c b/cyac/iter.c index 6724aa2..17ce6a6 100644 --- a/cyac/iter.c +++ b/cyac/iter.c @@ -18,6 +18,7 @@ #include #include "dec.h" +#include "err.h" #include "iter.h" enum YACErr @@ -125,7 +126,8 @@ YACIterBlob( switch (atom->typ) { case YACItemNIL: if (((ptrdiff_t)len - *off) <= (ptrdiff_t)chunkLen) { - return YACErrBlobShortChunk; + atom->off = (ptrdiff_t)chunkLen; + return YACErrNotEnough; } atom->typ = YACItemBin; atom->val.buf = buf + *off; diff --git a/cyac/iter.h b/cyac/iter.h index 081603d..a310fc5 100644 --- a/cyac/iter.h +++ b/cyac/iter.h @@ -4,6 +4,7 @@ #include #include "dec.h" +#include "err.h" typedef enum YACErr (*YACIterCb)( const unsigned char *key, diff --git a/cyac/o.list b/cyac/o.list index 97f1fd7..078f659 100644 --- a/cyac/o.list +++ b/cyac/o.list @@ -5,5 +5,6 @@ enctai.o frombe.o iter.o leapsecs.o +pool.o tobe.o utf8.o diff --git a/cyac/pool.c b/cyac/pool.c new file mode 100644 index 0000000..7464ef9 --- /dev/null +++ b/cyac/pool.c @@ -0,0 +1,265 @@ +// cyac -- C YAC encoder implementation +// Copyright (C) 2024 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +#include +#include +#include +#include + +#include "dec.h" +#include "err.h" +#include "pool.h" + +static const ptrdiff_t yacPoolChunkLen = 32; + +enum YACErr +YACItemPoolInit(struct YACItemPool *pool) +{ + pool->len = 0; + pool->cap = yacPoolChunkLen; + pool->list = calloc((size_t)(pool->cap), sizeof(struct YACItem)); + if (pool->list == NULL) { + return YACErrNoMem; + } + return YACErrNo; +} + +enum YACErr +YACItemPoolGrow(struct YACItemPool *pool) +{ + pool->cap += yacPoolChunkLen; + if (pool->cap <= 0) { + return YACErrNoMem; + } + const size_t size = (size_t)(pool->cap) * sizeof(struct YACItem); + pool->list = realloc(pool->list, size); + if (pool->list == NULL) { + return YACErrNoMem; + } + const size_t begin = (size_t)(pool->len) * sizeof(struct YACItem); + memset((unsigned char *)(pool->list) + begin, 0, size - begin); + return YACErrNo; +} + +static enum YACErr +yacItemPoolAdd( + struct YACItemPool *pool, + ptrdiff_t *off, + const unsigned char *buf, + const size_t len) +{ + enum YACErr err = YACErrInvalid; + if (pool->len == pool->cap) { + err = YACItemPoolGrow(pool); + if (err != YACErrNo) { + return err; + } + } + struct YACItem *item = &(pool->list[pool->len]); + item->next = -1; + item->off = *off; + err = YACAtomDecode(&(item->atom), buf + *off, len - (size_t)(*off)); + if (err != YACErrNo) { + return err; + } + (*off) += item->atom.off; + pool->len++; + if (pool->len <= 0) { + return YACErrNoMem; + } + return YACErrNo; +} + +enum YACErr +YACItemPoolParse( + struct YACItemPool *pool, + ptrdiff_t *off, + const unsigned char *buf, + const size_t len) +{ + ptrdiff_t item = pool->len; + enum YACErr err = yacItemPoolAdd(pool, off, buf, len); + if (err != YACErrNo) { + return err; + } + switch (pool->list[item].atom.typ) { + case YACItemList: { + pool->list[item].atom.val.first = item + 1; + pool->list[item].atom.len = 0; + ptrdiff_t prev = -1; + ptrdiff_t cur = -1; + ptrdiff_t idx = item; + for (;;) { + err = YACItemPoolParse(pool, off, buf, len); + if (err != YACErrNo) { + return err; + } + cur = idx + 1; + if (pool->list[cur].atom.typ == YACItemEOC) { + if (pool->list[item].atom.len == 0) { + pool->list[item].atom.val.first = -1; + } + return YACErrNo; + } + if (prev != -1) { + pool->list[prev].next = cur; + } + prev = cur; + idx = (pool->len) - 1; + pool->list[item].atom.len++; + } + } + case YACItemMap: { + pool->list[item].atom.val.first = item + 1; + pool->list[item].atom.len = 0; + ptrdiff_t idx = item; + ptrdiff_t prev = -1; + ptrdiff_t cur = -1; + size_t prevKeyLen = 0; + const unsigned char *prevKey = NULL; + for (;;) { + err = YACItemPoolParse(pool, off, buf, len); + if (err != YACErrNo) { + return err; + } + cur = idx + 1; + switch (pool->list[cur].atom.typ) { + case YACItemEOC: + if (pool->list[item].atom.len == 0) { + pool->list[item].atom.val.first = -1; + } + return YACErrNo; + case YACItemStr: + break; + case YACItemNIL: + case YACItemFalse: + case YACItemTrue: + case YACItemUUID: + case YACItemUint: + case YACItemSint: + case YACItemList: + case YACItemMap: + case YACItemBlob: + case YACItemFloat: + case YACItemTAI64: + case YACItemBin: + case YACItemRaw: + default: + return YACErrMapBadKey; + } + if (pool->list[cur].atom.len == 0) { + return YACErrMapBadKey; + } + if (pool->list[cur].atom.len < prevKeyLen) { + return YACErrMapUnordered; + } + if ((pool->list[cur].atom.len == prevKeyLen) && + (memcmp(prevKey, pool->list[cur].atom.val.buf, prevKeyLen) >= 0)) { + return YACErrMapUnordered; + } + prevKeyLen = pool->list[cur].atom.len; + prevKey = pool->list[cur].atom.val.buf; + if (prev != -1) { + pool->list[prev].next = cur; + } + prev = cur; + idx = (pool->len) - 1; + err = YACItemPoolParse(pool, off, buf, len); + if (err != YACErrNo) { + return err; + } + cur = idx + 1; + if (pool->list[cur].atom.typ == YACItemEOC) { + return YACErrMapNoVal; + } + pool->list[prev].next = cur; + prev = cur; + idx = (pool->len) - 1; + pool->list[item].atom.len++; + } + } + case YACItemBlob: { + pool->list[item].atom.len = 0; + const ptrdiff_t chunkLen = (ptrdiff_t)(pool->list[item].atom.val.uint); + ptrdiff_t idx = item; + ptrdiff_t prev = -1; + ptrdiff_t cur = -1; + bool eoc = false; + while (!eoc) { + err = YACItemPoolParse(pool, off, buf, len); + if (err != YACErrNo) { + return err; + } + cur = idx + 1; + struct YACAtom *atom = &(pool->list[cur].atom); + switch (atom->typ) { + case YACItemNIL: + if (((ptrdiff_t)len - *off) <= chunkLen) { + atom->off = chunkLen; + return YACErrNotEnough; + } + atom->typ = YACItemBin; + atom->val.buf = buf + *off; + atom->len = (size_t)chunkLen; + (*off) += chunkLen; + break; + case YACItemBin: + if ((ptrdiff_t)(atom->len) >= chunkLen) { + return YACErrBlobBadTerm; + } + eoc = true; + break; + case YACItemEOC: + case YACItemFalse: + case YACItemTrue: + case YACItemUUID: + case YACItemUint: + case YACItemSint: + case YACItemList: + case YACItemMap: + case YACItemBlob: + case YACItemFloat: + case YACItemTAI64: + case YACItemStr: + case YACItemRaw: + default: + return YACErrBlobBadAtom; + } + if (prev != -1) { + pool->list[prev].next = cur; + } + prev = cur; + idx = (pool->len) - 1; + pool->list[item].atom.len++; + } + break; + } + case YACItemEOC: + case YACItemNIL: + case YACItemFalse: + case YACItemTrue: + case YACItemUUID: + case YACItemUint: + case YACItemSint: + case YACItemFloat: + case YACItemTAI64: + case YACItemBin: + case YACItemStr: + case YACItemRaw: + default: + break; + } + return YACErrNo; +} diff --git a/cyac/pool.h b/cyac/pool.h new file mode 100644 index 0000000..04b3927 --- /dev/null +++ b/cyac/pool.h @@ -0,0 +1,54 @@ +#ifndef YAC_POOL_H +#define YAC_POOL_H + +#include + +#include "dec.h" +#include "err.h" + +// @deftypevar struct YACItem +// Each item contains the atom structure. But item can be a part of the +// list or map. @code{.next} contains the pool index value to the next +// element of the list or map, following current one. It equals to -1, +// then it is the last one. +// Map is a list of pairs: first value is always a UTF-8 string with the +// key name, next one is its value. +// @code{.off} is the offset of item in the previously provided buffer. +// +// Remember that @code{.next} of the list/map/blob is the (possible) +// element after the whole list/map/blob. @code{.atom.val.first} is the +// (possible) first element inside those containers. +// @end deftypevar +struct YACItem { + ptrdiff_t next; + ptrdiff_t off; + struct YACAtom atom; +}; + +// @deftypevar struct YACItemPool +// Pool contains concatenated @code{YACItem}s. Item's @{.next} can be +// used as an index in that pool: @code{pool->list[item.next]}. +// @strong{Remember} that if there is not enough room for the next item, +// then @code{.list} is reallocated, so previous pointers to the items +// may become invalid! Using their indices will be safer. +// @end deftypevar +struct YACItemPool { + struct YACItem *list; + ptrdiff_t len; + ptrdiff_t cap; +}; + +enum YACErr +YACItemPoolInit(struct YACItemPool *); + +enum YACErr +YACItemPoolGrow(struct YACItemPool *); + +enum YACErr +YACItemPoolParse( + struct YACItemPool *, + ptrdiff_t *off, + const unsigned char *buf, + const size_t len); + +#endif // YAC_POOL_H -- 2.50.0