From 0f0e4004a96f8ac20e300aee29cd274bd447797b9ae101eecb9d8c2fd787c56f Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Thu, 27 Mar 2025 11:54:40 +0300 Subject: [PATCH] Data validation against schemas --- c/cmd/clean | 1 + c/cmd/pp/pp.c | 7 +- c/cmd/schema-validate/.gitignore | 1 + c/cmd/schema-validate/all.do | 1 + c/cmd/schema-validate/clean | 4 + c/cmd/schema-validate/schema-validate.c | 100 ++++ c/cmd/schema-validate/schema-validate.do | 8 + c/lib/items.c | 69 +-- c/lib/items.h | 51 +- c/lib/o.list | 1 + c/lib/schema.c | 572 +++++++++++++++++++++++ c/lib/schema.h | 67 +++ go/cmd/schema-validate/main.go | 67 +++ go/schema/check.go | 228 +++++++++ go/utils/mk-bin | 2 + spec/index.texi | 1 + spec/schema/cmds.texi | 144 ++++++ spec/schema/index.texi | 34 ++ spec/schema/tcl.texi | 62 +++ tcl/keks.tcl | 74 +-- tcl/schema2bin | 114 +++++ tcl/schemas/pub.tcl | 86 ++++ 22 files changed, 1580 insertions(+), 114 deletions(-) create mode 100644 c/cmd/schema-validate/.gitignore create mode 100644 c/cmd/schema-validate/all.do create mode 100755 c/cmd/schema-validate/clean create mode 100644 c/cmd/schema-validate/schema-validate.c create mode 100644 c/cmd/schema-validate/schema-validate.do create mode 100644 c/lib/schema.c create mode 100644 c/lib/schema.h create mode 100644 go/cmd/schema-validate/main.go create mode 100644 go/schema/check.go create mode 100644 spec/schema/cmds.texi create mode 100644 spec/schema/index.texi create mode 100644 spec/schema/tcl.texi create mode 100755 tcl/schema2bin create mode 100644 tcl/schemas/pub.tcl diff --git a/c/cmd/clean b/c/cmd/clean index ca5d904..7e3f4e0 100755 --- a/c/cmd/clean +++ b/c/cmd/clean @@ -6,4 +6,5 @@ deatomiser/clean for-fuzz/clean lib/clean pp/clean +schema-validate/clean test-vector/clean diff --git a/c/cmd/pp/pp.c b/c/cmd/pp/pp.c index e276988..5cba845 100644 --- a/c/cmd/pp/pp.c +++ b/c/cmd/pp/pp.c @@ -109,8 +109,8 @@ printer( // NOLINT(misc-no-recursion) case KEKSItemTrue: fputs("TRUE\n", stdout); break; - case KEKSItemUUID: - UUIDPrint(item->atom.v.uuid); + case KEKSItemHexlet: + UUIDPrint(item->atom.v.hexlet); fputs("\n", stdout); break; case KEKSItemMagic: @@ -369,8 +369,7 @@ main(int argc, char **argv) return EXIT_FAILURE; } if (NoOffsets) { - free(items.offsets); - items.offsets = NULL; + KEKSItemsNoOffsets(&items); } errno = 0; if (clock_gettime(CLOCK_MONOTONIC_PRECISE, &started) != 0) { diff --git a/c/cmd/schema-validate/.gitignore b/c/cmd/schema-validate/.gitignore new file mode 100644 index 0000000..6e88972 --- /dev/null +++ b/c/cmd/schema-validate/.gitignore @@ -0,0 +1 @@ +/schema-validate diff --git a/c/cmd/schema-validate/all.do b/c/cmd/schema-validate/all.do new file mode 100644 index 0000000..c2f6764 --- /dev/null +++ b/c/cmd/schema-validate/all.do @@ -0,0 +1 @@ +redo-ifchange schema-validate diff --git a/c/cmd/schema-validate/clean b/c/cmd/schema-validate/clean new file mode 100755 index 0000000..a6cca59 --- /dev/null +++ b/c/cmd/schema-validate/clean @@ -0,0 +1,4 @@ +#!/bin/sh -e + +cd "$(dirname "$(realpath -- "$0")")" +exec rm -f schema-validate diff --git a/c/cmd/schema-validate/schema-validate.c b/c/cmd/schema-validate/schema-validate.c new file mode 100644 index 0000000..0a0b463 --- /dev/null +++ b/c/cmd/schema-validate/schema-validate.c @@ -0,0 +1,100 @@ +// schema-validate -- KEKS data structures validator +// Copyright (C) 2024-2025 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "../lib/mmap.h" + +static bool +parse(struct KEKSItems *items, const char *fn) +{ + const ptrdiff_t itemsInitialLen = 2048; + size_t len = 0; + unsigned char *buf = NULL; + if (!Mmap(&buf, &len, fn)) { + return false; + } + enum KEKSErr err = KEKSErrInvalid; + size_t off = 0; +RetryAfterMagic: + err = KEKSItemsInit(items, itemsInitialLen); + if (err != KEKSErrNo) { + fprintf(stderr, "err: %s\n", KEKSErr2Str(err)); + return false; + } + err = KEKSItemsParse(items, &off, buf, len); + if (err != KEKSErrNo) { + fprintf(stderr, "err: %s\n", KEKSErr2Str(err)); + KEKSItemsFree(items); + return false; + } + struct KEKSItem *item = &(items->list[0]); + if (item->atom.typ == KEKSItemMagic) { + KEKSItemsFree(items); + goto RetryAfterMagic; + } + return true; +} + +int +main(int argc, char **argv) +{ + if (argc < 4) { + fputs("Usage: schema-validate SCHEMA.keks SCHEMA-NAME DATA.keks\n", stderr); + return EXIT_FAILURE; + } + struct KEKSItems data; + struct KEKSItems schema; + if (!parse(&schema, argv[1])) { + return EXIT_FAILURE; + } + if (!parse(&data, argv[3])) { + return EXIT_FAILURE; + } + size_t idxSchema = KEKSItemsGetByKey(&schema, 0, argv[2]); + if (idxSchema == 0) { + fputs("can not find specified schema name\n", stderr); + return EXIT_FAILURE; + } + struct KEKSSchemaErr err = KEKSSchemaValidate(&schema, &data, idxSchema, 0); + if (err.code != KEKSSchemaErrNo) { + switch (err.code) { + case KEKSSchemaErrInvalidSchema: + fputs("invalid schema: ", stderr); + break; + case KEKSSchemaErrUnexpectedState: + fputs("unexpected state: ", stderr); + break; + case KEKSSchemaErrInvalidData: + fputs("invalid data: ", stderr); + break; + case KEKSSchemaErrInvalid: + case KEKSSchemaErrNo: + default: + break; + } + fprintf(stderr, "schema:%zu data:%zu: %s\n", err.offSchema, err.offData, err.msg); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} diff --git a/c/cmd/schema-validate/schema-validate.do b/c/cmd/schema-validate/schema-validate.do new file mode 100644 index 0000000..659fe5d --- /dev/null +++ b/c/cmd/schema-validate/schema-validate.do @@ -0,0 +1,8 @@ +deps="../lib/mmap.o" +redo-ifchange $1.c $deps \ + ../../conf/cc ../../conf/cflags ../../conf/ldflags ../../conf/prefix +read CC <../../conf/cc +CFLAGS=$(cat ../../conf/cflags) +LDFLAGS=$(cat ../../conf/ldflags) +read PREFIX <../../conf/prefix +$CC $CFLAGS -I$PREFIX/include -o $3 $2.c $deps $LDFLAGS -L$PREFIX/lib -lkeks -lm -static diff --git a/c/lib/items.c b/c/lib/items.c index d19501d..e742ab4 100644 --- a/c/lib/items.c +++ b/c/lib/items.c @@ -43,6 +43,28 @@ KEKSItemsInit(struct KEKSItems *items, const ptrdiff_t initialLen) return KEKSErrNo; } +void +KEKSItemsNoOffsets(struct KEKSItems *items) +{ + if (items->offsets == NULL) { + return; + } + free(items->offsets); + items->offsets = NULL; +} + +void +KEKSItemsFree(struct KEKSItems *items) +{ + items->len = 0; + items->cap = 0; + if (items->list != NULL) { + free(items->list); + items->list = NULL; + } + KEKSItemsNoOffsets(items); +} + enum KEKSErr KEKSItemsGrow(struct KEKSItems *items) { @@ -458,56 +480,9 @@ KEKSItemsGetByKey(const struct KEKSItems *items, const size_t itemIdx, const cha return KEKSItemsGetByKeyLen(items, itemIdx, key, strlen(key)); } -size_t -KEKSItemsGetByKeyAndType( - const struct KEKSItems *items, - const size_t itemIdx, - const char *key, - const enum KEKSItemType typ) -{ - const size_t idx = KEKSItemsGetByKey(items, itemIdx, key); - if ((idx == 0) || (items->list[idx].atom.typ != typ)) { - return 0; - } - return idx; -} - bool KEKSStrEqual(const struct KEKSAtom *atom, const char *s) { return (atom->v.str.len == strlen(s)) && (memcmp(atom->v.str.ptr, s, atom->v.str.len) == 0); } - -bool -KEKSListHasOnlyType( - const struct KEKSItems *items, - size_t idx, - const enum KEKSItemType typ) -{ - idx = items->list[idx].atom.v.list.head; - while (idx != 0) { - if (items->list[idx].atom.typ != typ) { - return false; - } - idx = items->list[idx].next; - } - return true; -} - -bool -KEKSMapHasOnlyType( - const struct KEKSItems *items, - size_t idx, - const enum KEKSItemType typ) -{ - idx = items->list[idx].atom.v.list.head; - while (idx != 0) { - idx = items->list[idx].next; - if (items->list[idx].atom.typ != typ) { - return false; - } - idx = items->list[idx].next; - } - return true; -} diff --git a/c/lib/items.h b/c/lib/items.h index 6f9b1bb..7828c38 100644 --- a/c/lib/items.h +++ b/c/lib/items.h @@ -72,6 +72,20 @@ struct KEKSItems { enum KEKSErr KEKSItemsInit(struct KEKSItems *, const ptrdiff_t initialLen); +// TEXINFO: KEKSItemsFree +// @deftypefun void KEKSItemsFree (struct KEKSItems *items) +// Free the @ref{KEKSItems} structure occupied memory. +// @end deftypefun +void +KEKSItemsFree(struct KEKSItems *); + +// TEXINFO: KEKSItemsNoOffsets +// @deftypefun void KEKSItemsNoOffsets (struct KEKSItems *items) +// Disable offsets storage during decoding. +// @end deftypefun +void +KEKSItemsNoOffsets(struct KEKSItems *); + // TEXINFO: KEKSItemsGrow // @deftypefun {enum KEKSErr} KEKSItemsGrow (struct KEKSItems *items) // Enlarge underlying storage of items, increasing its capacity. If @@ -150,21 +164,6 @@ KEKSItemsGetByKeyLen( size_t KEKSItemsGetByKey(const struct KEKSItems *, const size_t itemIdx, const char *key); -// TEXINFO: KEKSItemsGetByKeyAndType -// @deftypefun size_t KEKSItemsGetByKeyAndType ( @ -// const struct KEKSItems *items, @ -// const size_t itemIdx, @ -// const char *key, @ -// const enum KEKSItemType typ) -// Same as @ref{KEKSItemsGetByKey}, but also check that value's type is @var{typ}. -// @end deftypefun -size_t -KEKSItemsGetByKeyAndType( - const struct KEKSItems *, - const size_t itemIdx, - const char *key, - const enum KEKSItemType typ); - // TEXINFO: KEKSStrEqual // @deftypefun bool KEKSStrEqual (const struct KEKSAtom *atom, const char *s) // Returns true if string atom's value equal to null-terminated @var{s}. @@ -172,26 +171,4 @@ KEKSItemsGetByKeyAndType( bool KEKSStrEqual(const struct KEKSAtom *, const char *s); -// TEXINFO: KEKSListHasOnlyType -// @deftypefun bool KEKSListHasOnlyType ( @ -// const struct KEKSItems *items, @ -// const size_t idx, @ -// const enum KEKSItemType typ) -// Returns true if @var{idx} list in @var{items} contains only values -// with the @var{typ} type. -// @end deftypefun -bool -KEKSListHasOnlyType(const struct KEKSItems *, size_t idx, const enum KEKSItemType typ); - -// TEXINFO: KEKSMapHasOnlyType -// @deftypefun bool KEKSMapHasOnlyType ( @ -// const struct KEKSItems *items, @ -// const size_t idx, @ -// const enum KEKSItemType typ) -// Returns true if @var{idx} map in @var{items} contains only values -// with the @var{typ} type. -// @end deftypefun -bool -KEKSMapHasOnlyType(const struct KEKSItems *, size_t idx, const enum KEKSItemType typ); - #endif // KEKS_POOL_H diff --git a/c/lib/o.list b/c/lib/o.list index 52a1ee0..2d87f0c 100644 --- a/c/lib/o.list +++ b/c/lib/o.list @@ -6,5 +6,6 @@ err.o frombe.o items.o leapsecs.o +schema.o tobe.o utf8.o diff --git a/c/lib/schema.c b/c/lib/schema.c new file mode 100644 index 0000000..b9045d2 --- /dev/null +++ b/c/lib/schema.c @@ -0,0 +1,572 @@ +// ckeks -- C KEKS encoder implementation +// Copyright (C) 2024-2025 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +#include +#include + +#include "atom.h" +#include "frombe.h" +#include "items.h" +#include "schema.h" + +static const char CmdEach[] = "EACH"; +static const char CmdExists[] = "EXISTS"; +static const char CmdGT[] = "GT"; +static const char CmdLT[] = "LT"; +static const char CmdNotExists[] = "!EXISTS"; +static const char CmdSchema[] = "SCHEMA"; +static const char CmdTake[] = "TAKE"; +static const char CmdTimeMaxPrec[] = "TIMEMAXPREC"; +static const char CmdType[] = "TYPE"; + +static const char TypeBin[] = "BIN"; +static const char TypeBlob[] = "BLOB"; +static const char TypeBool[] = "BOOL"; +static const char TypeHexlet[] = "HEXLET"; +static const char TypeInt[] = "INT"; +static const char TypeList[] = "LIST"; +static const char TypeMagic[] = "MAGIC"; +static const char TypeMap[] = "MAP"; +static const char TypeNIL[] = "NIL"; +static const char TypeStr[] = "STR"; +static const char TypeTAI64[] = "TAI64"; + +static struct KEKSSchemaErr +keksSchemaCmd( + size_t *taken, + bool *eachInList, + bool *eachInMap, + const struct KEKSItems *schema, + struct KEKSItems *data, + size_t idxSchema, + size_t idxData); + +static struct KEKSSchemaErr +keksSchemaLens( + int64_t *our, + int64_t *their, + const struct KEKSItems *schema, + struct KEKSItems *data, + size_t idxSchema, + size_t idxData) +{ + switch (schema->list[idxSchema].atom.typ) { + case KEKSItemPint: + (*our) = (int64_t)(schema->list[idxSchema].atom.v.pint); + break; + case KEKSItemNint: + (*our) = schema->list[idxSchema].atom.v.nint; + break; + case KEKSItemList: + case KEKSItemMap: + (*our) = (int64_t)(schema->list[idxSchema].atom.v.list.len); + break; + case KEKSItemStr: + case KEKSItemBin: + (*our) = (int64_t)(schema->list[idxSchema].atom.v.str.len); + break; + case KEKSItemInvalid: + case KEKSItemEOC: + case KEKSItemNIL: + case KEKSItemFalse: + case KEKSItemTrue: + case KEKSItemHexlet: + case KEKSItemBlob: + case KEKSItemFloat: + case KEKSItemTAI64: + case KEKSItemMagic: + case KEKSItemRaw: + default: + return (struct KEKSSchemaErr){ + .offSchema = schema->offsets[idxSchema], + .offData = data->offsets[idxData], + .code = KEKSSchemaErrUnexpectedState, + .msg = "unsupported len type", + }; + } + switch (data->list[idxData].atom.typ) { + case KEKSItemPint: + (*their) = (int64_t)(data->list[idxData].atom.v.pint); + break; + case KEKSItemNint: + (*their) = data->list[idxData].atom.v.nint; + break; + case KEKSItemList: + case KEKSItemMap: + (*their) = (int64_t)(data->list[idxData].atom.v.list.len); + break; + case KEKSItemStr: + case KEKSItemBin: + (*their) = (int64_t)(data->list[idxData].atom.v.str.len); + break; + case KEKSItemInvalid: + case KEKSItemEOC: + case KEKSItemNIL: + case KEKSItemFalse: + case KEKSItemTrue: + case KEKSItemHexlet: + case KEKSItemBlob: + case KEKSItemFloat: + case KEKSItemTAI64: + case KEKSItemMagic: + case KEKSItemRaw: + default: + return (struct KEKSSchemaErr){ + .offSchema = schema->offsets[idxSchema], + .offData = data->offsets[idxData], + .code = KEKSSchemaErrUnexpectedState, + .msg = "unsupported len type", + }; + } + return (struct KEKSSchemaErr){.code = KEKSSchemaErrNo}; +} + +static struct KEKSSchemaErr +keksSchemaCmd( // NOLINT(misc-no-recursion) + size_t *taken, + bool *eachInList, + bool *eachInMap, + const struct KEKSItems *schema, + struct KEKSItems *data, + size_t idxSchema, + size_t idxData) +{ + size_t origIdxSchema = idxSchema; + struct KEKSSchemaErr err; +Eached: + idxSchema = origIdxSchema; + err.offSchema = schema->offsets[idxSchema]; + err.offData = data->offsets[idxData]; + err.code = KEKSSchemaErrInvalid; + if (schema->list[idxSchema].atom.v.list.len <= 0) { + err.code = KEKSSchemaErrInvalidSchema; + err.msg = "empty cmd list"; + return err; + } + idxSchema = schema->list[idxSchema].atom.v.list.head; + if (schema->list[idxSchema].atom.typ != KEKSItemStr) { + err.code = KEKSSchemaErrInvalidSchema; + err.msg = "non-str cmd"; + return err; + } + if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdExists)) { + err.msg = "EXISTS"; + if ((*taken) == SIZE_MAX) { + err.code = KEKSSchemaErrInvalidData; + return err; + } + err.code = KEKSSchemaErrNo; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdNotExists)) { + err.msg = "!EXISTS"; + if ((*taken) != SIZE_MAX) { + err.code = KEKSSchemaErrInvalidData; + return err; + } + err.code = KEKSSchemaErrNo; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdTake)) { + idxSchema = schema->list[idxSchema].next; + err.offSchema = schema->offsets[idxSchema]; + switch (schema->list[idxSchema].atom.typ) { + case KEKSItemStr: + if ((schema->list[idxSchema].atom.v.str.len == 1) && + (schema->list[idxSchema].atom.v.str.ptr[0] == '.')) { + (*taken) = idxData; + } else { + if (data->list[idxData].atom.typ != KEKSItemMap) { + err.code = KEKSSchemaErrUnexpectedState; + err.msg = "non-map TAKE target"; + return err; + } + (*taken) = KEKSItemsGetByKeyLen( + data, + idxData, + (const char *)schema->list[idxSchema].atom.v.str.ptr, + schema->list[idxSchema].atom.v.str.len); + if ((*taken) == 0) { + (*taken) = SIZE_MAX; + } + } + break; + case KEKSItemPint: + if (data->list[idxData].atom.typ != KEKSItemList) { + err.code = KEKSSchemaErrUnexpectedState; + err.msg = "non-list TAKE target"; + return err; + } + (*taken) = data->list[idxData].atom.v.list.head; + for (uint64_t i = 0; i < schema->list[idxSchema].atom.v.pint; i++) { + if ((*taken) == 0) { + break; + } + (*taken) = data->list[*taken].next; + } + if ((*taken) == 0) { + (*taken) = SIZE_MAX; + } + break; + case KEKSItemInvalid: + case KEKSItemEOC: + case KEKSItemNIL: + case KEKSItemFalse: + case KEKSItemTrue: + case KEKSItemHexlet: + case KEKSItemNint: + case KEKSItemList: + case KEKSItemMap: + case KEKSItemBlob: + case KEKSItemFloat: + case KEKSItemTAI64: + case KEKSItemMagic: + case KEKSItemBin: + case KEKSItemRaw: + default: + err.code = KEKSSchemaErrInvalidSchema; + err.msg = "bad TAKE target"; + return err; + } + err.msg = "TAKE"; + err.code = KEKSSchemaErrNo; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdEach)) { + err.msg = "EACH"; + (*eachInList) = false; + (*eachInMap) = false; + if ((*taken) == SIZE_MAX) { + err.code = KEKSSchemaErrNo; + return err; + } + switch (data->list[*taken].atom.typ) { + case KEKSItemList: + (*eachInList) = true; + break; + case KEKSItemMap: + (*eachInMap) = true; + break; + case KEKSItemInvalid: + case KEKSItemEOC: + case KEKSItemNIL: + case KEKSItemFalse: + case KEKSItemTrue: + case KEKSItemHexlet: + case KEKSItemPint: + case KEKSItemNint: + case KEKSItemBlob: + case KEKSItemFloat: + case KEKSItemTAI64: + case KEKSItemMagic: + case KEKSItemBin: + case KEKSItemStr: + case KEKSItemRaw: + default: + err.code = KEKSSchemaErrUnexpectedState; + err.msg = "non-iterable EACH"; + return err; + } + if (data->list[*taken].atom.v.list.len == 0) { + (*taken) = SIZE_MAX; + } else { + (*taken) = data->list[*taken].atom.v.list.head; + } + err.code = KEKSSchemaErrNo; + return err; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdType)) { + const size_t expectedLen = 16; + enum KEKSItemType expected[16] = {KEKSItemInvalid}; + size_t idxExpected = 0; + idxSchema = schema->list[idxSchema].next; + while (idxSchema != 0) { + err.offSchema = schema->offsets[idxSchema]; + if (schema->list[idxSchema].atom.typ != KEKSItemStr) { + err.code = KEKSSchemaErrInvalidSchema; + err.msg = "non-str TYPE"; + return err; + } + if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeNIL)) { + expected[idxExpected] = KEKSItemNIL; + idxExpected++; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeBool)) { + expected[idxExpected] = KEKSItemFalse; + idxExpected++; + expected[idxExpected] = KEKSItemTrue; + idxExpected++; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeHexlet)) { + expected[idxExpected] = KEKSItemHexlet; + idxExpected++; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeInt)) { + expected[idxExpected] = KEKSItemPint; + idxExpected++; + expected[idxExpected] = KEKSItemNint; + idxExpected++; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeList)) { + expected[idxExpected] = KEKSItemList; + idxExpected++; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeMap)) { + expected[idxExpected] = KEKSItemMap; + idxExpected++; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeBlob)) { + expected[idxExpected] = KEKSItemBlob; + idxExpected++; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeTAI64)) { + expected[idxExpected] = KEKSItemTAI64; + idxExpected++; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeMagic)) { + expected[idxExpected] = KEKSItemMagic; + idxExpected++; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeBin)) { + expected[idxExpected] = KEKSItemBin; + idxExpected++; + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeStr)) { + expected[idxExpected] = KEKSItemStr; + idxExpected++; + } else { + err.code = KEKSSchemaErrInvalidSchema; + err.msg = "unknown TYPE"; + return err; + } + idxSchema = schema->list[idxSchema].next; + } + err.msg = "TYPE"; + if ((*taken) == SIZE_MAX) { + err.code = KEKSSchemaErrNo; + } else { + bool found = false; + for (size_t i = 0; i < expectedLen; i++) { + if (expected[i] == data->list[*taken].atom.typ) { + found = true; + break; + } + } + err.code = found ? KEKSSchemaErrNo : KEKSSchemaErrInvalidData; + } + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdGT)) { + err.msg = "GT"; + if ((*taken) == SIZE_MAX) { + err.code = KEKSSchemaErrNo; + } else { + idxSchema = schema->list[idxSchema].next; + err.offSchema = schema->offsets[idxSchema]; + err.offData = data->offsets[*taken]; + int64_t our = 0; + int64_t their = 0; + struct KEKSSchemaErr errLens = + keksSchemaLens(&our, &their, schema, data, idxSchema, *taken); + if (errLens.code != KEKSSchemaErrNo) { + return errLens; + } + err.code = (their <= our) ? KEKSSchemaErrInvalidData : KEKSSchemaErrNo; + } + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdLT)) { + err.msg = "LT"; + if ((*taken) == SIZE_MAX) { + err.code = KEKSSchemaErrNo; + } else { + idxSchema = schema->list[idxSchema].next; + err.offSchema = schema->offsets[idxSchema]; + err.offData = data->offsets[*taken]; + int64_t our = 0; + int64_t their = 0; + struct KEKSSchemaErr errLens = + keksSchemaLens(&our, &their, schema, data, idxSchema, *taken); + if (errLens.code != KEKSSchemaErrNo) { + return errLens; + } + err.code = (their >= our) ? KEKSSchemaErrInvalidData : KEKSSchemaErrNo; + } + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdSchema)) { + idxSchema = schema->list[idxSchema].next; + err.offSchema = schema->offsets[idxSchema]; + if (schema->list[idxSchema].atom.typ != KEKSItemStr) { + err.code = KEKSSchemaErrInvalidSchema; + err.msg = "non-str SCHEMA"; + return err; + } + idxSchema = KEKSItemsGetByKeyLen( + schema, + 0, + (const char *)schema->list[idxSchema].atom.v.str.ptr, + schema->list[idxSchema].atom.v.str.len); + if (idxSchema == 0) { + err.code = KEKSSchemaErrUnexpectedState; + err.msg = "unknown SCHEMA"; + return err; + } + err.offSchema = schema->offsets[idxSchema]; + err.msg = "SCHEMA"; + if ((*taken) == SIZE_MAX) { + err.code = KEKSSchemaErrNo; + } else { + struct KEKSSchemaErr errSchema = + KEKSSchemaValidate(schema, data, idxSchema, *taken); + if (errSchema.code != KEKSSchemaErrNo) { + return errSchema; + } + err.code = KEKSSchemaErrNo; + } + } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdTimeMaxPrec)) { + idxSchema = schema->list[idxSchema].next; + err.offSchema = schema->offsets[idxSchema]; + if (schema->list[idxSchema].atom.typ != KEKSItemPint) { + err.code = KEKSSchemaErrInvalidSchema; + err.msg = "non-int TIMEMAXPREC"; + return err; + } + err.msg = "TIMEMAXPREC"; + if ((*taken) == SIZE_MAX) { + err.code = KEKSSchemaErrNo; + } else { + if (data->list[*taken].atom.typ != KEKSItemTAI64) { + err.code = KEKSSchemaErrUnexpectedState; + err.msg = "non-TAI64 taken"; + return err; + } + uint32_t v = 0; + switch (schema->list[idxSchema].atom.v.pint) { + case 0: // s + if (data->list[*taken].atom.v.str.len > 8) { + err.code = KEKSSchemaErrInvalidData; + err.msg = ">TAI64"; + } + break; + case 3: // ms + if (data->list[*taken].atom.v.str.len > 12) { + err.code = KEKSSchemaErrInvalidData; + err.msg = ">TAI64N"; + } + if (data->list[*taken].atom.v.str.len > 8) { + v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 8, 4); + if ((v % 1000000) != 0) { + err.code = KEKSSchemaErrInvalidData; + err.msg = ">ms"; + } + } + break; + case 6: // µs + if (data->list[*taken].atom.v.str.len > 12) { + err.code = KEKSSchemaErrInvalidData; + err.msg = ">TAI64N"; + } + if (data->list[*taken].atom.v.str.len > 8) { + v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 8, 4); + if ((v % 1000) != 0) { + err.code = KEKSSchemaErrInvalidData; + err.msg = ">µs"; + } + } + break; + case 9: // ns + if (data->list[*taken].atom.v.str.len > 12) { + err.code = KEKSSchemaErrInvalidData; + err.msg = ">TAI64N"; + } + break; + case 12: // ps + if (data->list[*taken].atom.v.str.len > 12) { + v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 12, 4); + if ((v % 1000000) != 0) { + err.code = KEKSSchemaErrInvalidData; + err.msg = ">ps"; + } + } + break; + case 15: // fs + if (data->list[*taken].atom.v.str.len > 12) { + v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 12, 4); + if ((v % 1000) != 0) { + err.code = KEKSSchemaErrInvalidData; + err.msg = ">fs"; + } + } + break; + case 18: // as + break; + default: + err.code = KEKSSchemaErrInvalidSchema; + err.msg = "unknown TIMEMAXPREC value"; + return err; + } + err.code = KEKSSchemaErrNo; + } + } else { + err.msg = "unknown cmd"; + err.code = KEKSSchemaErrInvalidSchema; + } + if (err.code != KEKSSchemaErrNo) { + return err; + } + if (*eachInList) { + (*taken) = data->list[*taken].next; + if ((*taken) == 0) { + (*taken) = SIZE_MAX; + } else { + goto Eached; + } + } + if (*eachInMap) { + (*taken) = data->list[*taken].next; // key + if ((*taken) == 0) { + (*taken) = SIZE_MAX; + } else { + (*taken) = data->list[*taken].next; // value + if ((*taken) == 0) { + (*taken) = SIZE_MAX; + } else { + goto Eached; + } + } + } + (*eachInList) = false; + (*eachInMap) = false; + return err; +} + +struct KEKSSchemaErr +KEKSSchemaValidate( // NOLINT(misc-no-recursion) + const struct KEKSItems *schema, + struct KEKSItems *data, + size_t idxSchema, + size_t idxData) +{ + struct KEKSSchemaErr err = (struct KEKSSchemaErr){ + .offSchema = schema->offsets[idxSchema], + .offData = data->offsets[idxData], + .code = KEKSSchemaErrInvalid, + }; + if (schema->list[idxSchema].atom.typ != KEKSItemList) { + err.code = KEKSSchemaErrInvalidSchema; + err.msg = "non-list schema"; + return err; + } + idxSchema = schema->list[idxSchema].atom.v.list.head; + err.offSchema = schema->offsets[idxSchema]; + size_t taken = SIZE_MAX; + bool eachInList = false; + bool eachInMap = false; + struct KEKSSchemaErr errCmd; + while (idxSchema != 0) { + if (schema->list[idxSchema].atom.typ != KEKSItemList) { + err.code = KEKSSchemaErrInvalidSchema; + err.msg = "non-list cmds"; + return err; + } + errCmd = keksSchemaCmd( + &taken, &eachInList, &eachInMap, schema, data, idxSchema, idxData); + if (errCmd.code != KEKSSchemaErrNo) { + return errCmd; + } + idxSchema = schema->list[idxSchema].next; + err.offSchema = schema->offsets[idxSchema]; + } + err.code = KEKSSchemaErrNo; + return err; +} diff --git a/c/lib/schema.h b/c/lib/schema.h new file mode 100644 index 0000000..1a280a5 --- /dev/null +++ b/c/lib/schema.h @@ -0,0 +1,67 @@ +#ifndef KEKS_SCHEMA_H +#define KEKS_SCHEMA_H + +#include + +#include "items.h" + +// TEXINFO: KEKSSchemaErrType +// @deftp {Data type} {enum KEKSSchemaErrType} +// Data validation against schema error type. +// @itemize +// @item KEKSSchemaErrInvalid -- invalid item value, uninitialised +// @item KEKSSchemaErrNo -- no error +// @item KEKSSchemaErrInvalidSchema -- bad schema format itself +// @item KEKSSchemaErrUnexpectedState -- schema lead to unexpected state +// @item KEKSSchemaErrInvalidData -- data structure is invalid +// @end itemize +// @end deftp +enum KEKSSchemaErrType { + KEKSSchemaErrInvalid = 0, + KEKSSchemaErrNo = 1, + KEKSSchemaErrInvalidSchema, + KEKSSchemaErrUnexpectedState, + KEKSSchemaErrInvalidData, +}; + +// TEXINFO: KEKSSchemaErr +// @deftp {Data type} {struct KEKSSchemaErr} +// Data validation against schema error details. +// @table @code +// @item .code +// Error code. +// @item .msg +// Optional human readable message. +// @item .offSchema +// Offset of the failed command in the schema. +// @item .offData +// Offset of the failed element in the data. +// @end table +// @end deftp +struct KEKSSchemaErr { + size_t offSchema; + size_t offData; + const char *msg; + enum KEKSSchemaErrType code; + char _pad[4]; +}; + +// TEXINFO: KEKSSchemaValidate +// @deftypefun {struct KEKSSchemaErr} KEKSSchemaValidate @ +// (const struct KEKSItems *schema, @ +// struct KEKSItems *data, @ +// size_t idxSchema, @ +// size_t idxData) +// Validate decoded @var{data} against the decoded @var{schema}. +// @var{idxSchema} points to the MAP-item of the root schema you wish to +// check against. @var{idxData} points to the item of the data you are +// going to sanitise (0 in most cases). +// @end deftypefun +struct KEKSSchemaErr +KEKSSchemaValidate( + const struct KEKSItems *schema, + struct KEKSItems *data, + size_t idxSchema, + size_t idxData); + +#endif // KEKS_SCHEMA_H diff --git a/go/cmd/schema-validate/main.go b/go/cmd/schema-validate/main.go new file mode 100644 index 0000000..c201e87 --- /dev/null +++ b/go/cmd/schema-validate/main.go @@ -0,0 +1,67 @@ +// schema-validate KEKS data structures validator +// Copyright (C) 2024-2025 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +package main + +import ( + "flag" + "fmt" + "log" + "os" + + "go.cypherpunks.su/keks" + "go.cypherpunks.su/keks/schema" +) + +func main() { + flag.Parse() + log.SetFlags(log.Lshortfile) + if flag.NArg() != 3 { + fmt.Fprintf(os.Stderr, "Usage: schema-validate SCHEMA.keks SCHEMA-NAME DATA.keks\n") + os.Exit(1) + } + schemasRaw, err := os.ReadFile(flag.Arg(0)) + if err != nil { + log.Fatal(err) + } + dataRaw, err := os.ReadFile(flag.Arg(2)) + if err != nil { + log.Fatal(err) + } + + magic, schemasRaw := keks.StripMagic(schemasRaw) + if magic != schema.Magic { + log.Fatal("bad schema magic") + } + + _, dataRaw = keks.StripMagic(dataRaw) + d := keks.NewDecoderFromBytes(schemasRaw, nil) + var schemas map[string][][]any + err = d.DecodeStruct(&schemas) + if err != nil { + log.Fatal(err) + } + + d = keks.NewDecoderFromBytes(dataRaw, &keks.DecodeOpts{LeaveTAI64: true}) + data, err := d.Decode() + if err != nil { + log.Fatal(err) + } + + err = schema.Check(flag.Arg(1), schemas, data) + if err != nil { + log.Fatal(err) + } +} diff --git a/go/schema/check.go b/go/schema/check.go new file mode 100644 index 0000000..9143eab --- /dev/null +++ b/go/schema/check.go @@ -0,0 +1,228 @@ +// GoKEKS -- Go KEKS codec implementation +// Copyright (C) 2024-2025 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +package schema + +import ( + "errors" + "fmt" + "slices" + + "go.cypherpunks.su/tai64n/v4" + + "go.cypherpunks.su/keks" + "go.cypherpunks.su/keks/types" +) + +const ( + CmdTake = "TAKE" + CmdEach = "EACH" + CmdExists = "EXISTS" + CmdNotExists = "!EXISTS" + CmdType = "TYPE" + CmdLT = "LT" + CmdGT = "GT" + CmdSchema = "SCHEMA" + + Magic = "schema" +) + +func Check(schemaName string, schemas map[string][][]any, data any) error { + acts := schemas[schemaName] + if acts == nil { + return errors.New(schemaName + ": no schema") + } + var single bool // TAKEn, not EACH + var vs []any + for i, act := range acts { + switch cmd := act[0].(string); cmd { + case CmdExists: + if vs == nil { + return fmt.Errorf("%s: %d: %s", schemaName, i, cmd) + } + case CmdNotExists: + if vs != nil { + return fmt.Errorf("%s: %d: %s", schemaName, i, cmd) + } + case CmdTake: + single = true + switch k := act[1].(type) { + case string: + if k == "." { + vs = []any{data} + } else { + m := data.(map[string]any) + v, exists := m[k] + if !exists { + vs = nil + continue + } + vs = []any{v} + } + case uint64: + l := data.([]any) + vs = []any{l[k]} + default: + panic("bad take target") + } + case CmdEach: + single = false + if vs == nil { + continue + } + v := vs[0] + switch v := v.(type) { + case map[string]any: + vs = vs[:0] + for _, v := range v { + vs = append(vs, v) + } + case []any: + vs = v + default: + return fmt.Errorf("%s: %d: %s: non-iterable", schemaName, i, cmd) + } + case CmdType: + if vs == nil { + continue + } + expected := make([]types.Type, 0, len(act)-1) + for _, t := range act[1:] { + switch t := t.(string); t { + case "NIL": + expected = append(expected, types.NIL) + case "BOOL": + expected = append(expected, types.Bool) + case "HEXLET": + expected = append(expected, types.Hexlet) + case "INT": + expected = append(expected, types.UInt, types.Int) + case "LIST": + expected = append(expected, types.List) + case "MAP": + expected = append(expected, types.Map) + case "BLOB": + expected = append(expected, types.Blob) + case "TAI64": + expected = append(expected, types.TAI64, types.TAI64N, types.TAI64NA) + case "MAGIC": + expected = append(expected, types.Magic) + case "BIN": + expected = append(expected, types.Bin) + case "STR": + expected = append(expected, types.Str) + default: + panic("unknown type: " + t) + } + } + var typ types.Type + for n, v := range vs { + switch v.(type) { + case nil: + typ = types.NIL + case bool: + typ = types.Bool + case *keks.Hexlet: + typ = types.Hexlet + case uint64: + typ = types.UInt + case int64: + typ = types.Int + case []any: + typ = types.List + case map[string]any: + typ = types.Map + case *keks.BlobChunked: + typ = types.Blob + case *tai64n.TAI64: + typ = types.TAI64 + case *tai64n.TAI64N: + typ = types.TAI64N + case *tai64n.TAI64NA: + typ = types.TAI64NA + case keks.Magic: + typ = types.Magic + case []byte: + typ = types.Bin + case string: + typ = types.Str + case keks.Raw: + typ = types.Raw + default: + panic(fmt.Errorf("unsupported type: %+v", v)) + } + if !slices.Contains(expected, typ) { + return fmt.Errorf("%s: %d: %d: %s: %T", schemaName, i, n, cmd, v) + } + } + case CmdGT, CmdLT: + if vs == nil { + continue + } + var expect int64 + switch v := act[1].(type) { + case uint64: + expect = int64(v) + case int64: + expect = v + default: + panic(fmt.Errorf("unsupported type: %+v", v)) + } + for _, v := range vs { + var got int64 + if single { + switch v := v.(type) { + case string: + got = int64(len(v)) + case []byte: + got = int64(len(v)) + case []any: + got = int64(len(v)) + case map[string]any: + got = int64(len(v)) + case uint64: + got = int64(v) + case int64: + got = v + default: + panic("non len-able") + } + } + switch cmd { + case CmdGT: + if got <= expect { + return fmt.Errorf("%s: %d: %d <= %d", schemaName, i, got, expect) + } + case CmdLT: + if got >= expect { + return fmt.Errorf("%s: %d: %d >= %d", schemaName, i, got, expect) + } + } + } + case CmdSchema: + if vs == nil { + continue + } + for n, v := range vs { + if err := Check(act[1].(string), schemas, v); err != nil { + return fmt.Errorf("%s: %d: %d: %s: %w", schemaName, i, n, cmd, err) + } + } + default: + panic("unknown command: " + cmd) + } + } + return nil +} diff --git a/go/utils/mk-bin b/go/utils/mk-bin index 133b75f..dafadc0 100755 --- a/go/utils/mk-bin +++ b/go/utils/mk-bin @@ -6,3 +6,5 @@ mkdir -p bin bin=$(realpath bin) cd cmd/pp go build -o $bin/kekspp -ldflags "$GO_LDFLAGS" +cd ../schema-validate +go build -o $bin/keks-schema-validate -ldflags "$GO_LDFLAGS" diff --git a/spec/index.texi b/spec/index.texi index 4f86294..6441412 100644 --- a/spec/index.texi +++ b/spec/index.texi @@ -131,6 +131,7 @@ and won't be able to interpret/validate them. @include design.texi @include install.texi @include encoding/index.texi +@include schema/index.texi @include cm/index.texi @node Concepts Index diff --git a/spec/schema/cmds.texi b/spec/schema/cmds.texi new file mode 100644 index 0000000..4487ced --- /dev/null +++ b/spec/schema/cmds.texi @@ -0,0 +1,144 @@ +@node SchemaCmds +@cindex schema commands +@nodedescription Schema commands +@section Schema commands + +Data structure validation commands are grouped in so-called map of +schemas. Map's key is schema name. Schema's value is a list of commands. +Each command is a list of string-encoded words (with several exceptions). +First element of the command's list is a command name. Possible +following elements are command-specific. + +Here is full list of structure validation commands, that should be +generated from higher level schema descriptions. + +@table @code + +@item TAKE k +Choose the value of the "k" key in the map, if "k" is a string. +If "k" is integer, then choose the k-th value in a list. +If "k" equals to ".", then choose the element you are currently in +(current map or list). Command never fails, but key can be non-existent. + +@item EXISTS +Check that TAKEn element exists. + +@item !EXISTS +Check that TAKEn element does not exist. + +@item EACH +Execute the next command against every element of the TAKEn list, of +every value of the map. Do nothing if taken list/map is empty. + +@item TYPE T0 [T1 ...] +Check that TAKEn element's type is in (T0[, T1 ...]) set. +Do nothing if no element was taken. + +Possible types: BIN, BLOB, BOOL, HEXLET, INT, LIST, MAGIC, MAP, NIL, +STR, TAI64. + +@item GT n +Check that TAKEn integer value is greater than "n". If taken value is +either list or map, then check their length. If the value is a string, +then check its length. Do nothing if no element was taken. + +@item LT n +Same as @code{GT}, but check that value is less than "n". + +@item SCHEMA s +Check TAKEn element against schema with the name equal to "s". +Do nothing if no element was taken. + +@item TIMEMAXPREC p +Check maximal allowable time precision. "p" is integer with following +possible values: 0 -- only full seconds allowed, no parts; +3 -- only up to milliseconds; +6 -- only up to microseconds; +9 -- only up to nanoseconds; +12 -- only up to picoseconds; +15 -- only up to femtoseconds; +18 -- up to attoseconds; + +@end table + +For example let's check "our" structure, described in CDDL as: + +@verbatim +ai = text .gt 0 +fpr = bytes .size 32 +our = {a: ai, v: bytes/text, fpr: fpr, ?comment: text} +@end verbatim + +"a", "v", "fpr" fields are required ones. "v" has two allowable types. +"comment" is optional, but typed. And "fpr" has fixed length. +Corresponding schema can be: + +@verbatim +{"our": [ + ["TAKE", "a"], + ["EXISTS"], + ["TAKE", "a"], + ["TYPE", "STR"], + ["TAKE", "a"], + ["GT", 0], + + ["TAKE", "v"], + ["EXISTS"], + ["TAKE", "v"], + ["TYPE", "BIN", "STR"], + + ["TAKE", "fpr"], + ["EXISTS"], + ["TAKE", "fpr"], + ["TYPE", "BIN"], + ["TAKE", "fpr"], + ["GT", 31], + ["TAKE", "fpr"], + ["LT", 33], + + ["TAKE", "comment"], + ["TYPE", "STR"], +]} +@end verbatim + +Here is example with multiple schemas: + +@verbatim +latitude = -90..90 +longitude = -180..180 +where = [latitude, longitude] +wheres = [+ where] +@end verbatim + +@verbatim +{ + "where": [ + ["TAKE", "."], + ["TYPE", "LIST"], + ["TAKE", "."], + ["GT", 1], + ["TAKE", "."], + ["LT", 3], + ["TAKE", "."], + ["EACH"], + ["TAKE", "INT"], + ["TAKE", 0], + ["GT", -91], + ["TAKE", 0], + ["LT", 91], + ["TAKE", 1], + ["GT", -181], + ["TAKE", 1], + ["LT", 181], + ], + "wheres": [ + ["TAKE", "."], + ["TYPE", "LIST"], + ["TAKE", "."], + ["GT", 0], + ["TAKE", "."], + ["EACH"], + ["SCHEMA", "where"], + ], +} +@end verbatim diff --git a/spec/schema/index.texi b/spec/schema/index.texi new file mode 100644 index 0000000..227a434 --- /dev/null +++ b/spec/schema/index.texi @@ -0,0 +1,34 @@ +@node Schemas +@cindex Schemas +@cindex structure validation +@cindex data schemas +@nodedescription Structure validation against schemas +@unnumbered Data schemas + +Although KEKS can be decoded without any schema definition/specification, +data structures are likely to be checked against some kind of the schema. +Here is suggestion (not a requirement!) to use relatively simple data +structure validation specifications/schemas. + +How are data structures checked? You check if they have required fields, +have necessary types of fields, satisfying lengths of the lists/maps or +strings, and so on. In most cases those checks covers nearly everything +when you sanitise the structures. + +So suggestion is to specify those steps for some kind of very simple +minimalistic validation machine, that interprets them, executing +validation commands against the provided data structures. That "machine" +should be simple enough to be able to implement it quickly and with sane +amount of code. Validation steps should be easily decodable and +conveniently parsed even in C-language. + +Let's use KEKS format itself for the serialised validation steps! And +generate them from higher level language/code, convenient for humans. + +@verbatim +Tcl-schema -> keks-encode(validation-commands) + validate(keks-decode(validation-commands), keks-decode(data)) +@end verbatim + +@include schema/cmds.texi +@include schema/tcl.texi diff --git a/spec/schema/tcl.texi b/spec/schema/tcl.texi new file mode 100644 index 0000000..d42f227 --- /dev/null +++ b/spec/schema/tcl.texi @@ -0,0 +1,62 @@ +@node TclSchemas +@cindex Tcl schemas +@nodedescription Tcl-written schemas +@section Tcl schemas + +Validation commands are pretty low-level and are inconvenient to write +by hand, at least because of huge quantity of TAKEs. +@command{tcl/schema2bin} utility gives ability to convert much more +nicer schemas written on Tcl language to the KEKS-encoded commands. + +Example with "our" structure can be written as: + +@verbatim +SCHEMAS { +our { + {HAS a} + {TYPE= a {STR}} + {TAKE a} + {GT 0} + + {HAS v} + {TYPE= v {BIN STR}} + + {HAS fpr} + {TYPE= fpr {BIN}} + {LEN= fpr 32} + + {TYPE= comment {STR}} +} +} +@end verbatim + +and @ref{cm-pub, cm/pub} as: + +@verbatiminclude ../tcl/schemas/pub.tcl + +@command{schema2bin} provides additional shorter aliased commands: + +@table @code + +@item HAS k +Check existence of "k" element. + +@item !HAS k +Opposite to HAS. + +@item LEN= k l +Check that "k" has value equal to "l". + +@item TYPE= k Ts +Check that "k" has type in "Ts" set. + +@item TYPE* k Ts +Check that each element of "k" has type in "Ts" set. + +@item SCHEMA= k s +Check "k" against "s" schema. + +@item SCHEMA* k s +Check each element of "k" against "s" schema. + +@end table diff --git a/tcl/keks.tcl b/tcl/keks.tcl index 68cc9cf..a1cae56 100644 --- a/tcl/keks.tcl +++ b/tcl/keks.tcl @@ -22,30 +22,34 @@ proc add {v} { set buf [string cat $buf $v] } -proc char {v} { add [binary format c $v] } +proc char {v} {add [binary format c $v]} -proc EOC {} { char [expr 0x00] } -proc NIL {} { char [expr 0x01] } -proc FALSE {} { char [expr 0x02] } -proc TRUE {} { char [expr 0x03] } +proc EOC {} {char [expr 0x00]} +proc NIL {} {char [expr 0x01]} +proc FALSE {} {char [expr 0x02]} +proc TRUE {} {char [expr 0x03]} proc HEXLET {v} { set v [binary decode hex [string map {- ""} $v]] - if {[string length $v] != 16} { error "bad len" } + if {[string length $v] != 16} { + error "bad len" + } char [expr 0x04] add $v } proc MAGIC {v} { set l [string length $v] - if {$l > 12} { error "too long" } + if {$l > 12} { + error "too long" + } add "KEKS" add $v add [string repeat [binary format c 0] [expr {12 - $l}]] } proc toBEbin {l v} { - set a [list] + set a {} for {set i 0} {$i < $l} {incr i} { set b [expr {($l - $i - 1) * 8}] lappend a [binary format c [expr {($v & (0xFF << $b)) >> $b}]] @@ -53,7 +57,7 @@ proc toBEbin {l v} { return [join $a ""] } -proc toBE {l v} { add [toBEbin $l $v] } +proc toBE {l v} {add [toBEbin $l $v]} proc INT {v} { if {$v >= 0} { @@ -68,7 +72,9 @@ proc INT {v} { } set l 0 while {1} { - if {$v < [expr {1 << (($l+1)*8)}]} { break } + if {$v < [expr {1 << (($l+1)*8)}]} { + break + } incr l } BIN [toBEbin [expr {$l + 1}] $v] @@ -92,16 +98,20 @@ proc _str {atom v} { set vl [expr {$vl - 61}] } char [expr {$atom | $lv}] - if {$ll > 0} { toBE $ll $vl } + if {$ll > 0} { + toBE $ll $vl + } add $v } -proc BIN {v} { _str [expr 0x80] $v} -proc STR {v} { _str [expr {0x80 | 0x40}] [encoding convertto utf-8 $v]} +proc BIN {v} {_str [expr 0x80] $v} +proc STR {v} {_str [expr {0x80 | 0x40}] [encoding convertto utf-8 $v]} proc LIST {v} { char [expr 0x08] - foreach i $v { eval $i } + foreach i $v { + eval $i + } EOC } @@ -110,23 +120,29 @@ proc LenFirstSort {a b} { set b [encoding convertto utf-8 $b] set al [string length $a] set bl [string length $b] - if {$al < $bl} { return -1 } - if {$al > $bl} { return 1 } + if {$al < $bl} { + return -1 + } + if {$al > $bl} { + return 1 + } for {set i 0} {$i < [string length $a]} {incr i} { set av [lindex $a $i] set bv [lindex $b $i] - if {$av < $bv} { return -1 } - if {$av > $bv} { return 1 } + if {$av < $bv} { + return -1 + } + if {$av > $bv} { + return 1 + } } error "non-unique keys" } proc MAP {pairs} { set d [dict create] - set keys [list] - for {set i 0} {$i < [llength $pairs]} {incr i 2} { - set k [lindex $pairs $i] - set v [lindex $pairs [expr {$i + 1}]] + set keys {} + foreach {k v} $pairs { lappend keys $k dict set d $k $v } @@ -140,8 +156,10 @@ proc MAP {pairs} { } proc SET {v} { - set args [list] - foreach k $v { lappend args $k NIL } + set args {} + foreach k $v { + lappend args $k NIL + } MAP $args } @@ -207,10 +225,14 @@ proc toTAI64 {v} { variable Leapsecs set i 0 for {} {$i < [llength $Leapsecs]} {incr i} { - if {$v < [lindex $Leapsecs $i]} { break } + if {$v < [lindex $Leapsecs $i]} { + break + } } set v [expr {$v + 10 + $i}] - if {$v == [lindex $Leapsecs $i]} { incr v } + if {$v == [lindex $Leapsecs $i]} { + incr v + } set v [expr {$v + 0x4000000000000000}] toBE 8 $v } diff --git a/tcl/schema2bin b/tcl/schema2bin new file mode 100755 index 0000000..9da26c7 --- /dev/null +++ b/tcl/schema2bin @@ -0,0 +1,114 @@ +#!/usr/bin/env tclsh +# schema2bin -- Convert Tcl schemas to KEKS representation +# Copyright (C) 2024-2025 Sergey Matveev +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . + +source [file join [file dirname $::argv0] keks.tcl] +namespace import KEKS::* + +proc TAKE {v} { + if {[string is digit $v]} { + set v [list INT $v] + } { + set v [list STR $v] + } + subst {{LIST {{STR TAKE} {$v}}}} +} +proc EXISTS {} {subst {{LIST {{STR EXISTS}}}}} +proc !EXISTS {} {subst {{LIST {{STR !EXISTS}}}}} +proc EACH {} {subst {{LIST {{STR EACH}}}}} +proc TYPE {vs} { + set l {{STR TYPE}} + foreach v $vs { + lappend l "STR $v" + } + subst {{LIST {$l}}} +} +proc GT {v} {subst {{LIST {{STR GT} {INT $v}}}}} +proc LT {v} {subst {{LIST {{STR LT} {INT $v}}}}} +proc SCHEMA {v} {subst {{LIST {{STR SCHEMA} {STR $v}}}}} +proc TIMEMAXPREC {v} {subst {{LIST {{STR TIMEMAXPREC} {INT $v}}}}} + +proc evals {cmds} { + set rv {} + foreach cmd $cmds { + set rv [concat $rv [eval $cmd]] + } + return $rv +} + +proc SCHEMAS {v} { + set pairs {} + foreach {name cmds} $v { + lappend pairs $name [list LIST [evals $cmds]] + } + MAP $pairs +} + +proc HAS {k} { + evals [subst { + {TAKE $k} + {EXISTS} + }] +} + +proc !HAS {k} { + evals [subst { + {TAKE $k} + {!EXISTS} + }] +} + +proc LEN= {k l} { + evals [subst { + {TAKE $k} + {GT [expr {$l - 1}]} + {TAKE $k} + {LT [expr {$l + 1}]} + }] +} + +proc TYPE= {k types} { + evals [subst { + {TAKE $k} + {TYPE {$types} + }}] +} + +proc TYPE* {k types} { + evals [subst { + {TAKE $k} + {EACH} + {TYPE {$types}} + }] +} + +proc SCHEMA= {k schema} { + evals [subst { + {TAKE $k} + {SCHEMA $schema} + }] +} + +proc SCHEMA* {k schema} { + evals [subst { + {TAKE $k} + {EACH} + {SCHEMA $schema} + }] +} + +MAGIC schema +source [lindex $::argv 0] +puts [binary encode hex $::KEKS::buf] diff --git a/tcl/schemas/pub.tcl b/tcl/schemas/pub.tcl new file mode 100644 index 0000000..097ff63 --- /dev/null +++ b/tcl/schemas/pub.tcl @@ -0,0 +1,86 @@ +SCHEMAS { + +av { + {HAS a} + {TYPE= a {STR}} + {TAKE a} + {GT 0} + + {HAS v} + {TYPE= v {BIN}} +} +pub { + {HAS load} + {SCHEMA= load load} + {TYPE= sigs {LIST}} + {SCHEMA* sigs sig} + + {TYPE= pubs {LIST}} + {TAKE pubs} + {GT 0} + {SCHEMA* pubs pub} +} +load { + {HAS t} + {TYPE= t {STR}} + {TAKE t} + {GT 0} + + {HAS v} + {SCHEMA= v pub-load} +} +sig { + {HAS tbs} + {HAS sign} + {SCHEMA= sign av} + {SCHEMA= tbs tbs} +} +tbs { + {HAS sid} + {TYPE= sid {BIN}} + {LEN= sid 32} + + {HAS cid} + {TYPE= cid {HEXLET}} + + {HAS exp} + {TYPE= exp {LIST}} + {LEN= exp 2} + {TYPE* exp {TAI64}} + {TAKE exp} + {EACH} + {TIMEMAXPREC 0} + + {TYPE= when {TAI64}} + + {TYPE= nonce {BIN}} + {TAKE nonce} + {GT 0} +} +pub-load { + {HAS id} + {TYPE= id {BIN}} + {LEN= id 32} + + {!HAS crit} + + {TYPE= ku {MAP}} + {TAKE ku} + {GT 0} + {TYPE* ku {NIL}} + + {HAS pub} + {TYPE= pub {LIST}} + {TAKE pub} + {GT 0} + {SCHEMA* pub av} + + {HAS sub} + {TAKE sub} + {TYPE {MAP}} + {TAKE sub} + {GT 0} + {TYPE* sub {STR}} +} + +} -- 2.48.1