Data validation against schemas

author Sergey Matveev <stargrave@stargrave.org>

Thu, 27 Mar 2025 08:54:40 +0000 (11:54 +0300)

committer Sergey Matveev <stargrave@stargrave.org>

Thu, 3 Apr 2025 11:33:40 +0000 (14:33 +0300)
author Sergey Matveev <stargrave@stargrave.org>
Thu, 27 Mar 2025 08:54:40 +0000 (11:54 +0300)
committer Sergey Matveev <stargrave@stargrave.org>
Thu, 3 Apr 2025 11:33:40 +0000 (14:33 +0300)
diff --git a/c/cmd/clean b/c/cmd/clean

index ca5d9043788589ce1c42ef86cde23a5494f10554e626ebd8b101187a881b84f4..7e3f4e07feb73beca3d679938f6403207891b62d384aa6c5184808f7640dcac6 100755 (executable)
--- a/c/cmd/clean
+++ b/c/cmd/clean
@@ -6,4 +6,5 @@ deatomiser/clean
  for-fuzz/clean
  lib/clean
  pp/clean
+schema-validate/clean
  test-vector/clean
diff --git a/c/cmd/pp/pp.c b/c/cmd/pp/pp.c

index e2769880e013808a7a2ebc2f46910c9a11116c8b7563077ba5667904cf4f661c..5cba8459dea3a84ae3d60d627ce168f4095906d6bdd1ed30e13c2b0fc1625ad7 100644 (file)
--- a/c/cmd/pp/pp.c
+++ b/c/cmd/pp/pp.c
@@ -109,8 +109,8 @@ printer( // NOLINT(misc-no-recursion)
      case KEKSItemTrue:
          fputs("TRUE\n", stdout);
          break;
-    case KEKSItemUUID:
-        UUIDPrint(item->atom.v.uuid);
+    case KEKSItemHexlet:
+        UUIDPrint(item->atom.v.hexlet);
          fputs("\n", stdout);
          break;
      case KEKSItemMagic:
@@ -369,8 +369,7 @@ main(int argc, char **argv)
              return EXIT_FAILURE;
          }
          if (NoOffsets) {
-            free(items.offsets);
-            items.offsets = NULL;
+            KEKSItemsNoOffsets(&items);
          }
          errno = 0;
          if (clock_gettime(CLOCK_MONOTONIC_PRECISE, &started) != 0) {
diff --git a/c/cmd/schema-validate/.gitignore b/c/cmd/schema-validate/.gitignore

new file mode 100644 (file)

index 0000000..6e88972
--- /dev/null
+++ b/c/cmd/schema-validate/.gitignore
@@ -0,0 +1 @@
+/schema-validate
diff --git a/c/cmd/schema-validate/all.do b/c/cmd/schema-validate/all.do

new file mode 100644 (file)

index 0000000..c2f6764
--- /dev/null
+++ b/c/cmd/schema-validate/all.do
@@ -0,0 +1 @@
+redo-ifchange schema-validate
diff --git a/c/cmd/schema-validate/clean b/c/cmd/schema-validate/clean

new file mode 100755 (executable)

index 0000000..a6cca59
--- /dev/null
+++ b/c/cmd/schema-validate/clean
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+
+cd "$(dirname "$(realpath -- "$0")")"
+exec rm -f schema-validate
diff --git a/c/cmd/schema-validate/schema-validate.c b/c/cmd/schema-validate/schema-validate.c

new file mode 100644 (file)

index 0000000..0a0b463
--- /dev/null
+++ b/c/cmd/schema-validate/schema-validate.c
@@ -0,0 +1,100 @@
+// schema-validate -- KEKS data structures validator
+// Copyright (C) 2024-2025 Sergey Matveev <stargrave@stargrave.org>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation, version 3 of the License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <keks/atom.h>
+#include <keks/err.h>
+#include <keks/items.h>
+#include <keks/schema.h>
+
+#include "../lib/mmap.h"
+
+static bool
+parse(struct KEKSItems *items, const char *fn)
+{
+    const ptrdiff_t itemsInitialLen = 2048;
+    size_t len = 0;
+    unsigned char *buf = NULL;
+    if (!Mmap(&buf, &len, fn)) {
+        return false;
+    }
+    enum KEKSErr err = KEKSErrInvalid;
+    size_t off = 0;
+RetryAfterMagic:
+    err = KEKSItemsInit(items, itemsInitialLen);
+    if (err != KEKSErrNo) {
+        fprintf(stderr, "err: %s\n", KEKSErr2Str(err));
+        return false;
+    }
+    err = KEKSItemsParse(items, &off, buf, len);
+    if (err != KEKSErrNo) {
+        fprintf(stderr, "err: %s\n", KEKSErr2Str(err));
+        KEKSItemsFree(items);
+        return false;
+    }
+    struct KEKSItem *item = &(items->list[0]);
+    if (item->atom.typ == KEKSItemMagic) {
+        KEKSItemsFree(items);
+        goto RetryAfterMagic;
+    }
+    return true;
+}
+
+int
+main(int argc, char **argv)
+{
+    if (argc < 4) {
+        fputs("Usage: schema-validate SCHEMA.keks SCHEMA-NAME DATA.keks\n", stderr);
+        return EXIT_FAILURE;
+    }
+    struct KEKSItems data;
+    struct KEKSItems schema;
+    if (!parse(&schema, argv[1])) {
+        return EXIT_FAILURE;
+    }
+    if (!parse(&data, argv[3])) {
+        return EXIT_FAILURE;
+    }
+    size_t idxSchema = KEKSItemsGetByKey(&schema, 0, argv[2]);
+    if (idxSchema == 0) {
+        fputs("can not find specified schema name\n", stderr);
+        return EXIT_FAILURE;
+    }
+    struct KEKSSchemaErr err = KEKSSchemaValidate(&schema, &data, idxSchema, 0);
+    if (err.code != KEKSSchemaErrNo) {
+        switch (err.code) {
+        case KEKSSchemaErrInvalidSchema:
+            fputs("invalid schema: ", stderr);
+            break;
+        case KEKSSchemaErrUnexpectedState:
+            fputs("unexpected state: ", stderr);
+            break;
+        case KEKSSchemaErrInvalidData:
+            fputs("invalid data: ", stderr);
+            break;
+        case KEKSSchemaErrInvalid:
+        case KEKSSchemaErrNo:
+        default:
+            break;
+        }
+        fprintf(stderr, "schema:%zu data:%zu: %s\n", err.offSchema, err.offData, err.msg);
+        return EXIT_FAILURE;
+    }
+    return EXIT_SUCCESS;
+}
diff --git a/c/cmd/schema-validate/schema-validate.do b/c/cmd/schema-validate/schema-validate.do

new file mode 100644 (file)

index 0000000..659fe5d
--- /dev/null
+++ b/c/cmd/schema-validate/schema-validate.do
@@ -0,0 +1,8 @@
+deps="../lib/mmap.o"
+redo-ifchange $1.c $deps \
+    ../../conf/cc ../../conf/cflags ../../conf/ldflags ../../conf/prefix
+read CC <../../conf/cc
+CFLAGS=$(cat ../../conf/cflags)
+LDFLAGS=$(cat ../../conf/ldflags)
+read PREFIX <../../conf/prefix
+$CC $CFLAGS -I$PREFIX/include -o $3 $2.c $deps $LDFLAGS -L$PREFIX/lib -lkeks -lm -static
diff --git a/c/lib/items.c b/c/lib/items.c

index d19501dd65ca3fe8b18036c3f9353c8a631a784099918c086534bc7b4f1c7443..e742ab43ca9c901f58dfd9ce8e16cbecdb496224ed48302134e6ec2230838179 100644 (file)
--- a/c/lib/items.c
+++ b/c/lib/items.c
@@ -43,6 +43,28 @@ KEKSItemsInit(struct KEKSItems *items, const ptrdiff_t initialLen)
      return KEKSErrNo;
  }
  
+void
+KEKSItemsNoOffsets(struct KEKSItems *items)
+{
+    if (items->offsets == NULL) {
+        return;
+    }
+    free(items->offsets);
+    items->offsets = NULL;
+}
+
+void
+KEKSItemsFree(struct KEKSItems *items)
+{
+    items->len = 0;
+    items->cap = 0;
+    if (items->list != NULL) {
+        free(items->list);
+        items->list = NULL;
+    }
+    KEKSItemsNoOffsets(items);
+}
+
  enum KEKSErr
  KEKSItemsGrow(struct KEKSItems *items)
  {
@@ -458,56 +480,9 @@ KEKSItemsGetByKey(const struct KEKSItems *items, const size_t itemIdx, const cha
      return KEKSItemsGetByKeyLen(items, itemIdx, key, strlen(key));
  }
  
-size_t
-KEKSItemsGetByKeyAndType(
-    const struct KEKSItems *items,
-    const size_t itemIdx,
-    const char *key,
-    const enum KEKSItemType typ)
-{
-    const size_t idx = KEKSItemsGetByKey(items, itemIdx, key);
-    if ((idx == 0) || (items->list[idx].atom.typ != typ)) {
-        return 0;
-    }
-    return idx;
-}
-
  bool
  KEKSStrEqual(const struct KEKSAtom *atom, const char *s)
  {
      return (atom->v.str.len == strlen(s)) &&
             (memcmp(atom->v.str.ptr, s, atom->v.str.len) == 0);
  }
-
-bool
-KEKSListHasOnlyType(
-    const struct KEKSItems *items,
-    size_t idx,
-    const enum KEKSItemType typ)
-{
-    idx = items->list[idx].atom.v.list.head;
-    while (idx != 0) {
-        if (items->list[idx].atom.typ != typ) {
-            return false;
-        }
-        idx = items->list[idx].next;
-    }
-    return true;
-}
-
-bool
-KEKSMapHasOnlyType(
-    const struct KEKSItems *items,
-    size_t idx,
-    const enum KEKSItemType typ)
-{
-    idx = items->list[idx].atom.v.list.head;
-    while (idx != 0) {
-        idx = items->list[idx].next;
-        if (items->list[idx].atom.typ != typ) {
-            return false;
-        }
-        idx = items->list[idx].next;
-    }
-    return true;
-}
diff --git a/c/lib/items.h b/c/lib/items.h

index 6f9b1bb66d6002079efc3965e346b298b10e41969b8c35a106d289ff3bd3d91c..7828c388983b5edf6b6fac63e34fd212afea36c79774cde6c0c19645532e3fcb 100644 (file)
--- a/c/lib/items.h
+++ b/c/lib/items.h
@@ -72,6 +72,20 @@ struct KEKSItems {
  enum KEKSErr
  KEKSItemsInit(struct KEKSItems *, const ptrdiff_t initialLen);
  
+// TEXINFO: KEKSItemsFree
+// @deftypefun void KEKSItemsFree (struct KEKSItems *items)
+// Free the @ref{KEKSItems} structure occupied memory.
+// @end deftypefun
+void
+KEKSItemsFree(struct KEKSItems *);
+
+// TEXINFO: KEKSItemsNoOffsets
+// @deftypefun void KEKSItemsNoOffsets (struct KEKSItems *items)
+// Disable offsets storage during decoding.
+// @end deftypefun
+void
+KEKSItemsNoOffsets(struct KEKSItems *);
+
  // TEXINFO: KEKSItemsGrow
  // @deftypefun {enum KEKSErr} KEKSItemsGrow (struct KEKSItems *items)
  // Enlarge underlying storage of items, increasing its capacity. If
@@ -150,21 +164,6 @@ KEKSItemsGetByKeyLen(
  size_t
  KEKSItemsGetByKey(const struct KEKSItems *, const size_t itemIdx, const char *key);
  
-// TEXINFO: KEKSItemsGetByKeyAndType
-// @deftypefun size_t KEKSItemsGetByKeyAndType ( @
-//     const struct KEKSItems *items, @
-//     const size_t itemIdx, @
-//     const char *key, @
-//     const enum KEKSItemType typ)
-// Same as @ref{KEKSItemsGetByKey}, but also check that value's type is @var{typ}.
-// @end deftypefun
-size_t
-KEKSItemsGetByKeyAndType(
-    const struct KEKSItems *,
-    const size_t itemIdx,
-    const char *key,
-    const enum KEKSItemType typ);
-
  // TEXINFO: KEKSStrEqual
  // @deftypefun bool KEKSStrEqual (const struct KEKSAtom *atom, const char *s)
  // Returns true if string atom's value equal to null-terminated @var{s}.
@@ -172,26 +171,4 @@ KEKSItemsGetByKeyAndType(
  bool
  KEKSStrEqual(const struct KEKSAtom *, const char *s);
  
-// TEXINFO: KEKSListHasOnlyType
-// @deftypefun bool KEKSListHasOnlyType ( @
-//     const struct KEKSItems *items, @
-//     const size_t idx, @
-//     const enum KEKSItemType typ)
-// Returns true if @var{idx} list in @var{items} contains only values
-// with the @var{typ} type.
-// @end deftypefun
-bool
-KEKSListHasOnlyType(const struct KEKSItems *, size_t idx, const enum KEKSItemType typ);
-
-// TEXINFO: KEKSMapHasOnlyType
-// @deftypefun bool KEKSMapHasOnlyType ( @
-//     const struct KEKSItems *items, @
-//     const size_t idx, @
-//     const enum KEKSItemType typ)
-// Returns true if @var{idx} map in @var{items} contains only values
-// with the @var{typ} type.
-// @end deftypefun
-bool
-KEKSMapHasOnlyType(const struct KEKSItems *, size_t idx, const enum KEKSItemType typ);
-
  #endif // KEKS_POOL_H
diff --git a/c/lib/o.list b/c/lib/o.list

index 52a1ee006e15039370f14e46e1783cbe831d7d30e14fac50f301e44751a83557..2d87f0c547f69213877929c486523f9a20735bac6ad91bca686811e945585e8b 100644 (file)
--- a/c/lib/o.list
+++ b/c/lib/o.list
@@ -6,5 +6,6 @@ err.o
  frombe.o
  items.o
  leapsecs.o
+schema.o
  tobe.o
  utf8.o
diff --git a/c/lib/schema.c b/c/lib/schema.c

new file mode 100644 (file)

index 0000000..b9045d2
--- /dev/null
+++ b/c/lib/schema.c
@@ -0,0 +1,572 @@
+// ckeks -- C KEKS encoder implementation
+// Copyright (C) 2024-2025 Sergey Matveev <stargrave@stargrave.org>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation, version 3 of the License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "atom.h"
+#include "frombe.h"
+#include "items.h"
+#include "schema.h"
+
+static const char CmdEach[] = "EACH";
+static const char CmdExists[] = "EXISTS";
+static const char CmdGT[] = "GT";
+static const char CmdLT[] = "LT";
+static const char CmdNotExists[] = "!EXISTS";
+static const char CmdSchema[] = "SCHEMA";
+static const char CmdTake[] = "TAKE";
+static const char CmdTimeMaxPrec[] = "TIMEMAXPREC";
+static const char CmdType[] = "TYPE";
+
+static const char TypeBin[] = "BIN";
+static const char TypeBlob[] = "BLOB";
+static const char TypeBool[] = "BOOL";
+static const char TypeHexlet[] = "HEXLET";
+static const char TypeInt[] = "INT";
+static const char TypeList[] = "LIST";
+static const char TypeMagic[] = "MAGIC";
+static const char TypeMap[] = "MAP";
+static const char TypeNIL[] = "NIL";
+static const char TypeStr[] = "STR";
+static const char TypeTAI64[] = "TAI64";
+
+static struct KEKSSchemaErr
+keksSchemaCmd(
+    size_t *taken,
+    bool *eachInList,
+    bool *eachInMap,
+    const struct KEKSItems *schema,
+    struct KEKSItems *data,
+    size_t idxSchema,
+    size_t idxData);
+
+static struct KEKSSchemaErr
+keksSchemaLens(
+    int64_t *our,
+    int64_t *their,
+    const struct KEKSItems *schema,
+    struct KEKSItems *data,
+    size_t idxSchema,
+    size_t idxData)
+{
+    switch (schema->list[idxSchema].atom.typ) {
+    case KEKSItemPint:
+        (*our) = (int64_t)(schema->list[idxSchema].atom.v.pint);
+        break;
+    case KEKSItemNint:
+        (*our) = schema->list[idxSchema].atom.v.nint;
+        break;
+    case KEKSItemList:
+    case KEKSItemMap:
+        (*our) = (int64_t)(schema->list[idxSchema].atom.v.list.len);
+        break;
+    case KEKSItemStr:
+    case KEKSItemBin:
+        (*our) = (int64_t)(schema->list[idxSchema].atom.v.str.len);
+        break;
+    case KEKSItemInvalid:
+    case KEKSItemEOC:
+    case KEKSItemNIL:
+    case KEKSItemFalse:
+    case KEKSItemTrue:
+    case KEKSItemHexlet:
+    case KEKSItemBlob:
+    case KEKSItemFloat:
+    case KEKSItemTAI64:
+    case KEKSItemMagic:
+    case KEKSItemRaw:
+    default:
+        return (struct KEKSSchemaErr){
+            .offSchema = schema->offsets[idxSchema],
+            .offData = data->offsets[idxData],
+            .code = KEKSSchemaErrUnexpectedState,
+            .msg = "unsupported len type",
+        };
+    }
+    switch (data->list[idxData].atom.typ) {
+    case KEKSItemPint:
+        (*their) = (int64_t)(data->list[idxData].atom.v.pint);
+        break;
+    case KEKSItemNint:
+        (*their) = data->list[idxData].atom.v.nint;
+        break;
+    case KEKSItemList:
+    case KEKSItemMap:
+        (*their) = (int64_t)(data->list[idxData].atom.v.list.len);
+        break;
+    case KEKSItemStr:
+    case KEKSItemBin:
+        (*their) = (int64_t)(data->list[idxData].atom.v.str.len);
+        break;
+    case KEKSItemInvalid:
+    case KEKSItemEOC:
+    case KEKSItemNIL:
+    case KEKSItemFalse:
+    case KEKSItemTrue:
+    case KEKSItemHexlet:
+    case KEKSItemBlob:
+    case KEKSItemFloat:
+    case KEKSItemTAI64:
+    case KEKSItemMagic:
+    case KEKSItemRaw:
+    default:
+        return (struct KEKSSchemaErr){
+            .offSchema = schema->offsets[idxSchema],
+            .offData = data->offsets[idxData],
+            .code = KEKSSchemaErrUnexpectedState,
+            .msg = "unsupported len type",
+        };
+    }
+    return (struct KEKSSchemaErr){.code = KEKSSchemaErrNo};
+}
+
+static struct KEKSSchemaErr
+keksSchemaCmd( // NOLINT(misc-no-recursion)
+    size_t *taken,
+    bool *eachInList,
+    bool *eachInMap,
+    const struct KEKSItems *schema,
+    struct KEKSItems *data,
+    size_t idxSchema,
+    size_t idxData)
+{
+    size_t origIdxSchema = idxSchema;
+    struct KEKSSchemaErr err;
+Eached:
+    idxSchema = origIdxSchema;
+    err.offSchema = schema->offsets[idxSchema];
+    err.offData = data->offsets[idxData];
+    err.code = KEKSSchemaErrInvalid;
+    if (schema->list[idxSchema].atom.v.list.len <= 0) {
+        err.code = KEKSSchemaErrInvalidSchema;
+        err.msg = "empty cmd list";
+        return err;
+    }
+    idxSchema = schema->list[idxSchema].atom.v.list.head;
+    if (schema->list[idxSchema].atom.typ != KEKSItemStr) {
+        err.code = KEKSSchemaErrInvalidSchema;
+        err.msg = "non-str cmd";
+        return err;
+    }
+    if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdExists)) {
+        err.msg = "EXISTS";
+        if ((*taken) == SIZE_MAX) {
+            err.code = KEKSSchemaErrInvalidData;
+            return err;
+        }
+        err.code = KEKSSchemaErrNo;
+    } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdNotExists)) {
+        err.msg = "!EXISTS";
+        if ((*taken) != SIZE_MAX) {
+            err.code = KEKSSchemaErrInvalidData;
+            return err;
+        }
+        err.code = KEKSSchemaErrNo;
+    } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdTake)) {
+        idxSchema = schema->list[idxSchema].next;
+        err.offSchema = schema->offsets[idxSchema];
+        switch (schema->list[idxSchema].atom.typ) {
+        case KEKSItemStr:
+            if ((schema->list[idxSchema].atom.v.str.len == 1) &&
+                (schema->list[idxSchema].atom.v.str.ptr[0] == '.')) {
+                (*taken) = idxData;
+            } else {
+                if (data->list[idxData].atom.typ != KEKSItemMap) {
+                    err.code = KEKSSchemaErrUnexpectedState;
+                    err.msg = "non-map TAKE target";
+                    return err;
+                }
+                (*taken) = KEKSItemsGetByKeyLen(
+                    data,
+                    idxData,
+                    (const char *)schema->list[idxSchema].atom.v.str.ptr,
+                    schema->list[idxSchema].atom.v.str.len);
+                if ((*taken) == 0) {
+                    (*taken) = SIZE_MAX;
+                }
+            }
+            break;
+        case KEKSItemPint:
+            if (data->list[idxData].atom.typ != KEKSItemList) {
+                err.code = KEKSSchemaErrUnexpectedState;
+                err.msg = "non-list TAKE target";
+                return err;
+            }
+            (*taken) = data->list[idxData].atom.v.list.head;
+            for (uint64_t i = 0; i < schema->list[idxSchema].atom.v.pint; i++) {
+                if ((*taken) == 0) {
+                    break;
+                }
+                (*taken) = data->list[*taken].next;
+            }
+            if ((*taken) == 0) {
+                (*taken) = SIZE_MAX;
+            }
+            break;
+        case KEKSItemInvalid:
+        case KEKSItemEOC:
+        case KEKSItemNIL:
+        case KEKSItemFalse:
+        case KEKSItemTrue:
+        case KEKSItemHexlet:
+        case KEKSItemNint:
+        case KEKSItemList:
+        case KEKSItemMap:
+        case KEKSItemBlob:
+        case KEKSItemFloat:
+        case KEKSItemTAI64:
+        case KEKSItemMagic:
+        case KEKSItemBin:
+        case KEKSItemRaw:
+        default:
+            err.code = KEKSSchemaErrInvalidSchema;
+            err.msg = "bad TAKE target";
+            return err;
+        }
+        err.msg = "TAKE";
+        err.code = KEKSSchemaErrNo;
+    } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdEach)) {
+        err.msg = "EACH";
+        (*eachInList) = false;
+        (*eachInMap) = false;
+        if ((*taken) == SIZE_MAX) {
+            err.code = KEKSSchemaErrNo;
+            return err;
+        }
+        switch (data->list[*taken].atom.typ) {
+        case KEKSItemList:
+            (*eachInList) = true;
+            break;
+        case KEKSItemMap:
+            (*eachInMap) = true;
+            break;
+        case KEKSItemInvalid:
+        case KEKSItemEOC:
+        case KEKSItemNIL:
+        case KEKSItemFalse:
+        case KEKSItemTrue:
+        case KEKSItemHexlet:
+        case KEKSItemPint:
+        case KEKSItemNint:
+        case KEKSItemBlob:
+        case KEKSItemFloat:
+        case KEKSItemTAI64:
+        case KEKSItemMagic:
+        case KEKSItemBin:
+        case KEKSItemStr:
+        case KEKSItemRaw:
+        default:
+            err.code = KEKSSchemaErrUnexpectedState;
+            err.msg = "non-iterable EACH";
+            return err;
+        }
+        if (data->list[*taken].atom.v.list.len == 0) {
+            (*taken) = SIZE_MAX;
+        } else {
+            (*taken) = data->list[*taken].atom.v.list.head;
+        }
+        err.code = KEKSSchemaErrNo;
+        return err;
+    } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdType)) {
+        const size_t expectedLen = 16;
+        enum KEKSItemType expected[16] = {KEKSItemInvalid};
+        size_t idxExpected = 0;
+        idxSchema = schema->list[idxSchema].next;
+        while (idxSchema != 0) {
+            err.offSchema = schema->offsets[idxSchema];
+            if (schema->list[idxSchema].atom.typ != KEKSItemStr) {
+                err.code = KEKSSchemaErrInvalidSchema;
+                err.msg = "non-str TYPE";
+                return err;
+            }
+            if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeNIL)) {
+                expected[idxExpected] = KEKSItemNIL;
+                idxExpected++;
+            } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeBool)) {
+                expected[idxExpected] = KEKSItemFalse;
+                idxExpected++;
+                expected[idxExpected] = KEKSItemTrue;
+                idxExpected++;
+            } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeHexlet)) {
+                expected[idxExpected] = KEKSItemHexlet;
+                idxExpected++;
+            } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeInt)) {
+                expected[idxExpected] = KEKSItemPint;
+                idxExpected++;
+                expected[idxExpected] = KEKSItemNint;
+                idxExpected++;
+            } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeList)) {
+                expected[idxExpected] = KEKSItemList;
+                idxExpected++;
+            } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeMap)) {
+                expected[idxExpected] = KEKSItemMap;
+                idxExpected++;
+            } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeBlob)) {
+                expected[idxExpected] = KEKSItemBlob;
+                idxExpected++;
+            } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeTAI64)) {
+                expected[idxExpected] = KEKSItemTAI64;
+                idxExpected++;
+            } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeMagic)) {
+                expected[idxExpected] = KEKSItemMagic;
+                idxExpected++;
+            } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeBin)) {
+                expected[idxExpected] = KEKSItemBin;
+                idxExpected++;
+            } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), TypeStr)) {
+                expected[idxExpected] = KEKSItemStr;
+                idxExpected++;
+            } else {
+                err.code = KEKSSchemaErrInvalidSchema;
+                err.msg = "unknown TYPE";
+                return err;
+            }
+            idxSchema = schema->list[idxSchema].next;
+        }
+        err.msg = "TYPE";
+        if ((*taken) == SIZE_MAX) {
+            err.code = KEKSSchemaErrNo;
+        } else {
+            bool found = false;
+            for (size_t i = 0; i < expectedLen; i++) {
+                if (expected[i] == data->list[*taken].atom.typ) {
+                    found = true;
+                    break;
+                }
+            }
+            err.code = found ? KEKSSchemaErrNo : KEKSSchemaErrInvalidData;
+        }
+    } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdGT)) {
+        err.msg = "GT";
+        if ((*taken) == SIZE_MAX) {
+            err.code = KEKSSchemaErrNo;
+        } else {
+            idxSchema = schema->list[idxSchema].next;
+            err.offSchema = schema->offsets[idxSchema];
+            err.offData = data->offsets[*taken];
+            int64_t our = 0;
+            int64_t their = 0;
+            struct KEKSSchemaErr errLens =
+                keksSchemaLens(&our, &their, schema, data, idxSchema, *taken);
+            if (errLens.code != KEKSSchemaErrNo) {
+                return errLens;
+            }
+            err.code = (their <= our) ? KEKSSchemaErrInvalidData : KEKSSchemaErrNo;
+        }
+    } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdLT)) {
+        err.msg = "LT";
+        if ((*taken) == SIZE_MAX) {
+            err.code = KEKSSchemaErrNo;
+        } else {
+            idxSchema = schema->list[idxSchema].next;
+            err.offSchema = schema->offsets[idxSchema];
+            err.offData = data->offsets[*taken];
+            int64_t our = 0;
+            int64_t their = 0;
+            struct KEKSSchemaErr errLens =
+                keksSchemaLens(&our, &their, schema, data, idxSchema, *taken);
+            if (errLens.code != KEKSSchemaErrNo) {
+                return errLens;
+            }
+            err.code = (their >= our) ? KEKSSchemaErrInvalidData : KEKSSchemaErrNo;
+        }
+    } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdSchema)) {
+        idxSchema = schema->list[idxSchema].next;
+        err.offSchema = schema->offsets[idxSchema];
+        if (schema->list[idxSchema].atom.typ != KEKSItemStr) {
+            err.code = KEKSSchemaErrInvalidSchema;
+            err.msg = "non-str SCHEMA";
+            return err;
+        }
+        idxSchema = KEKSItemsGetByKeyLen(
+            schema,
+            0,
+            (const char *)schema->list[idxSchema].atom.v.str.ptr,
+            schema->list[idxSchema].atom.v.str.len);
+        if (idxSchema == 0) {
+            err.code = KEKSSchemaErrUnexpectedState;
+            err.msg = "unknown SCHEMA";
+            return err;
+        }
+        err.offSchema = schema->offsets[idxSchema];
+        err.msg = "SCHEMA";
+        if ((*taken) == SIZE_MAX) {
+            err.code = KEKSSchemaErrNo;
+        } else {
+            struct KEKSSchemaErr errSchema =
+                KEKSSchemaValidate(schema, data, idxSchema, *taken);
+            if (errSchema.code != KEKSSchemaErrNo) {
+                return errSchema;
+            }
+            err.code = KEKSSchemaErrNo;
+        }
+    } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdTimeMaxPrec)) {
+        idxSchema = schema->list[idxSchema].next;
+        err.offSchema = schema->offsets[idxSchema];
+        if (schema->list[idxSchema].atom.typ != KEKSItemPint) {
+            err.code = KEKSSchemaErrInvalidSchema;
+            err.msg = "non-int TIMEMAXPREC";
+            return err;
+        }
+        err.msg = "TIMEMAXPREC";
+        if ((*taken) == SIZE_MAX) {
+            err.code = KEKSSchemaErrNo;
+        } else {
+            if (data->list[*taken].atom.typ != KEKSItemTAI64) {
+                err.code = KEKSSchemaErrUnexpectedState;
+                err.msg = "non-TAI64 taken";
+                return err;
+            }
+            uint32_t v = 0;
+            switch (schema->list[idxSchema].atom.v.pint) {
+            case 0: // s
+                if (data->list[*taken].atom.v.str.len > 8) {
+                    err.code = KEKSSchemaErrInvalidData;
+                    err.msg = ">TAI64";
+                }
+                break;
+            case 3: // ms
+                if (data->list[*taken].atom.v.str.len > 12) {
+                    err.code = KEKSSchemaErrInvalidData;
+                    err.msg = ">TAI64N";
+                }
+                if (data->list[*taken].atom.v.str.len > 8) {
+                    v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 8, 4);
+                    if ((v % 1000000) != 0) {
+                        err.code = KEKSSchemaErrInvalidData;
+                        err.msg = ">ms";
+                    }
+                }
+                break;
+            case 6: // µs
+                if (data->list[*taken].atom.v.str.len > 12) {
+                    err.code = KEKSSchemaErrInvalidData;
+                    err.msg = ">TAI64N";
+                }
+                if (data->list[*taken].atom.v.str.len > 8) {
+                    v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 8, 4);
+                    if ((v % 1000) != 0) {
+                        err.code = KEKSSchemaErrInvalidData;
+                        err.msg = ">µs";
+                    }
+                }
+                break;
+            case 9: // ns
+                if (data->list[*taken].atom.v.str.len > 12) {
+                    err.code = KEKSSchemaErrInvalidData;
+                    err.msg = ">TAI64N";
+                }
+                break;
+            case 12: // ps
+                if (data->list[*taken].atom.v.str.len > 12) {
+                    v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 12, 4);
+                    if ((v % 1000000) != 0) {
+                        err.code = KEKSSchemaErrInvalidData;
+                        err.msg = ">ps";
+                    }
+                }
+                break;
+            case 15: // fs
+                if (data->list[*taken].atom.v.str.len > 12) {
+                    v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 12, 4);
+                    if ((v % 1000) != 0) {
+                        err.code = KEKSSchemaErrInvalidData;
+                        err.msg = ">fs";
+                    }
+                }
+                break;
+            case 18: // as
+                break;
+            default:
+                err.code = KEKSSchemaErrInvalidSchema;
+                err.msg = "unknown TIMEMAXPREC value";
+                return err;
+            }
+            err.code = KEKSSchemaErrNo;
+        }
+    } else {
+        err.msg = "unknown cmd";
+        err.code = KEKSSchemaErrInvalidSchema;
+    }
+    if (err.code != KEKSSchemaErrNo) {
+        return err;
+    }
+    if (*eachInList) {
+        (*taken) = data->list[*taken].next;
+        if ((*taken) == 0) {
+            (*taken) = SIZE_MAX;
+        } else {
+            goto Eached;
+        }
+    }
+    if (*eachInMap) {
+        (*taken) = data->list[*taken].next; // key
+        if ((*taken) == 0) {
+            (*taken) = SIZE_MAX;
+        } else {
+            (*taken) = data->list[*taken].next; // value
+            if ((*taken) == 0) {
+                (*taken) = SIZE_MAX;
+            } else {
+                goto Eached;
+            }
+        }
+    }
+    (*eachInList) = false;
+    (*eachInMap) = false;
+    return err;
+}
+
+struct KEKSSchemaErr
+KEKSSchemaValidate( // NOLINT(misc-no-recursion)
+    const struct KEKSItems *schema,
+    struct KEKSItems *data,
+    size_t idxSchema,
+    size_t idxData)
+{
+    struct KEKSSchemaErr err = (struct KEKSSchemaErr){
+        .offSchema = schema->offsets[idxSchema],
+        .offData = data->offsets[idxData],
+        .code = KEKSSchemaErrInvalid,
+    };
+    if (schema->list[idxSchema].atom.typ != KEKSItemList) {
+        err.code = KEKSSchemaErrInvalidSchema;
+        err.msg = "non-list schema";
+        return err;
+    }
+    idxSchema = schema->list[idxSchema].atom.v.list.head;
+    err.offSchema = schema->offsets[idxSchema];
+    size_t taken = SIZE_MAX;
+    bool eachInList = false;
+    bool eachInMap = false;
+    struct KEKSSchemaErr errCmd;
+    while (idxSchema != 0) {
+        if (schema->list[idxSchema].atom.typ != KEKSItemList) {
+            err.code = KEKSSchemaErrInvalidSchema;
+            err.msg = "non-list cmds";
+            return err;
+        }
+        errCmd = keksSchemaCmd(
+            &taken, &eachInList, &eachInMap, schema, data, idxSchema, idxData);
+        if (errCmd.code != KEKSSchemaErrNo) {
+            return errCmd;
+        }
+        idxSchema = schema->list[idxSchema].next;
+        err.offSchema = schema->offsets[idxSchema];
+    }
+    err.code = KEKSSchemaErrNo;
+    return err;
+}
diff --git a/c/lib/schema.h b/c/lib/schema.h

new file mode 100644 (file)

index 0000000..1a280a5
--- /dev/null
+++ b/c/lib/schema.h
@@ -0,0 +1,67 @@
+#ifndef KEKS_SCHEMA_H
+#define KEKS_SCHEMA_H
+
+#include <stddef.h>
+
+#include "items.h"
+
+// TEXINFO: KEKSSchemaErrType
+// @deftp {Data type} {enum KEKSSchemaErrType}
+// Data validation against schema error type.
+// @itemize
+// @item KEKSSchemaErrInvalid -- invalid item value, uninitialised
+// @item KEKSSchemaErrNo -- no error
+// @item KEKSSchemaErrInvalidSchema -- bad schema format itself
+// @item KEKSSchemaErrUnexpectedState -- schema lead to unexpected state
+// @item KEKSSchemaErrInvalidData -- data structure is invalid
+// @end itemize
+// @end deftp
+enum KEKSSchemaErrType {
+    KEKSSchemaErrInvalid = 0,
+    KEKSSchemaErrNo = 1,
+    KEKSSchemaErrInvalidSchema,
+    KEKSSchemaErrUnexpectedState,
+    KEKSSchemaErrInvalidData,
+};
+
+// TEXINFO: KEKSSchemaErr
+// @deftp {Data type} {struct KEKSSchemaErr}
+// Data validation against schema error details.
+// @table @code
+// @item .code
+//     Error code.
+// @item .msg
+//     Optional human readable message.
+// @item .offSchema
+//     Offset of the failed command in the schema.
+// @item .offData
+//     Offset of the failed element in the data.
+// @end table
+// @end deftp
+struct KEKSSchemaErr {
+    size_t offSchema;
+    size_t offData;
+    const char *msg;
+    enum KEKSSchemaErrType code;
+    char _pad[4];
+};
+
+// TEXINFO: KEKSSchemaValidate
+// @deftypefun {struct KEKSSchemaErr} KEKSSchemaValidate @
+//     (const struct KEKSItems *schema, @
+//      struct KEKSItems *data, @
+//      size_t idxSchema, @
+//      size_t idxData)
+// Validate decoded @var{data} against the decoded @var{schema}.
+// @var{idxSchema} points to the MAP-item of the root schema you wish to
+// check against. @var{idxData} points to the item of the data you are
+// going to sanitise (0 in most cases).
+// @end deftypefun
+struct KEKSSchemaErr
+KEKSSchemaValidate(
+    const struct KEKSItems *schema,
+    struct KEKSItems *data,
+    size_t idxSchema,
+    size_t idxData);
+
+#endif // KEKS_SCHEMA_H
diff --git a/go/cmd/schema-validate/main.go b/go/cmd/schema-validate/main.go

new file mode 100644 (file)

index 0000000..c201e87
--- /dev/null
+++ b/go/cmd/schema-validate/main.go
@@ -0,0 +1,67 @@
+// schema-validate KEKS data structures validator
+// Copyright (C) 2024-2025 Sergey Matveev <stargrave@stargrave.org>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation, version 3 of the License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package main
+
+import (
+       "flag"
+       "fmt"
+       "log"
+       "os"
+
+       "go.cypherpunks.su/keks"
+       "go.cypherpunks.su/keks/schema"
+)
+
+func main() {
+       flag.Parse()
+       log.SetFlags(log.Lshortfile)
+       if flag.NArg() != 3 {
+               fmt.Fprintf(os.Stderr, "Usage: schema-validate SCHEMA.keks SCHEMA-NAME DATA.keks\n")
+               os.Exit(1)
+       }
+       schemasRaw, err := os.ReadFile(flag.Arg(0))
+       if err != nil {
+               log.Fatal(err)
+       }
+       dataRaw, err := os.ReadFile(flag.Arg(2))
+       if err != nil {
+               log.Fatal(err)
+       }
+
+       magic, schemasRaw := keks.StripMagic(schemasRaw)
+       if magic != schema.Magic {
+               log.Fatal("bad schema magic")
+       }
+
+       _, dataRaw = keks.StripMagic(dataRaw)
+       d := keks.NewDecoderFromBytes(schemasRaw, nil)
+       var schemas map[string][][]any
+       err = d.DecodeStruct(&schemas)
+       if err != nil {
+               log.Fatal(err)
+       }
+
+       d = keks.NewDecoderFromBytes(dataRaw, &keks.DecodeOpts{LeaveTAI64: true})
+       data, err := d.Decode()
+       if err != nil {
+               log.Fatal(err)
+       }
+
+       err = schema.Check(flag.Arg(1), schemas, data)
+       if err != nil {
+               log.Fatal(err)
+       }
+}
diff --git a/go/schema/check.go b/go/schema/check.go

new file mode 100644 (file)

index 0000000..9143eab
--- /dev/null
+++ b/go/schema/check.go
@@ -0,0 +1,228 @@
+// GoKEKS -- Go KEKS codec implementation
+// Copyright (C) 2024-2025 Sergey Matveev <stargrave@stargrave.org>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation, version 3 of the License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package schema
+
+import (
+       "errors"
+       "fmt"
+       "slices"
+
+       "go.cypherpunks.su/tai64n/v4"
+
+       "go.cypherpunks.su/keks"
+       "go.cypherpunks.su/keks/types"
+)
+
+const (
+       CmdTake      = "TAKE"
+       CmdEach      = "EACH"
+       CmdExists    = "EXISTS"
+       CmdNotExists = "!EXISTS"
+       CmdType      = "TYPE"
+       CmdLT        = "LT"
+       CmdGT        = "GT"
+       CmdSchema    = "SCHEMA"
+
+       Magic = "schema"
+)
+
+func Check(schemaName string, schemas map[string][][]any, data any) error {
+       acts := schemas[schemaName]
+       if acts == nil {
+               return errors.New(schemaName + ": no schema")
+       }
+       var single bool // TAKEn, not EACH
+       var vs []any
+       for i, act := range acts {
+               switch cmd := act[0].(string); cmd {
+               case CmdExists:
+                       if vs == nil {
+                               return fmt.Errorf("%s: %d: %s", schemaName, i, cmd)
+                       }
+               case CmdNotExists:
+                       if vs != nil {
+                               return fmt.Errorf("%s: %d: %s", schemaName, i, cmd)
+                       }
+               case CmdTake:
+                       single = true
+                       switch k := act[1].(type) {
+                       case string:
+                               if k == "." {
+                                       vs = []any{data}
+                               } else {
+                                       m := data.(map[string]any)
+                                       v, exists := m[k]
+                                       if !exists {
+                                               vs = nil
+                                               continue
+                                       }
+                                       vs = []any{v}
+                               }
+                       case uint64:
+                               l := data.([]any)
+                               vs = []any{l[k]}
+                       default:
+                               panic("bad take target")
+                       }
+               case CmdEach:
+                       single = false
+                       if vs == nil {
+                               continue
+                       }
+                       v := vs[0]
+                       switch v := v.(type) {
+                       case map[string]any:
+                               vs = vs[:0]
+                               for _, v := range v {
+                                       vs = append(vs, v)
+                               }
+                       case []any:
+                               vs = v
+                       default:
+                               return fmt.Errorf("%s: %d: %s: non-iterable", schemaName, i, cmd)
+                       }
+               case CmdType:
+                       if vs == nil {
+                               continue
+                       }
+                       expected := make([]types.Type, 0, len(act)-1)
+                       for _, t := range act[1:] {
+                               switch t := t.(string); t {
+                               case "NIL":
+                                       expected = append(expected, types.NIL)
+                               case "BOOL":
+                                       expected = append(expected, types.Bool)
+                               case "HEXLET":
+                                       expected = append(expected, types.Hexlet)
+                               case "INT":
+                                       expected = append(expected, types.UInt, types.Int)
+                               case "LIST":
+                                       expected = append(expected, types.List)
+                               case "MAP":
+                                       expected = append(expected, types.Map)
+                               case "BLOB":
+                                       expected = append(expected, types.Blob)
+                               case "TAI64":
+                                       expected = append(expected, types.TAI64, types.TAI64N, types.TAI64NA)
+                               case "MAGIC":
+                                       expected = append(expected, types.Magic)
+                               case "BIN":
+                                       expected = append(expected, types.Bin)
+                               case "STR":
+                                       expected = append(expected, types.Str)
+                               default:
+                                       panic("unknown type: " + t)
+                               }
+                       }
+                       var typ types.Type
+                       for n, v := range vs {
+                               switch v.(type) {
+                               case nil:
+                                       typ = types.NIL
+                               case bool:
+                                       typ = types.Bool
+                               case *keks.Hexlet:
+                                       typ = types.Hexlet
+                               case uint64:
+                                       typ = types.UInt
+                               case int64:
+                                       typ = types.Int
+                               case []any:
+                                       typ = types.List
+                               case map[string]any:
+                                       typ = types.Map
+                               case *keks.BlobChunked:
+                                       typ = types.Blob
+                               case *tai64n.TAI64:
+                                       typ = types.TAI64
+                               case *tai64n.TAI64N:
+                                       typ = types.TAI64N
+                               case *tai64n.TAI64NA:
+                                       typ = types.TAI64NA
+                               case keks.Magic:
+                                       typ = types.Magic
+                               case []byte:
+                                       typ = types.Bin
+                               case string:
+                                       typ = types.Str
+                               case keks.Raw:
+                                       typ = types.Raw
+                               default:
+                                       panic(fmt.Errorf("unsupported type: %+v", v))
+                               }
+                               if !slices.Contains(expected, typ) {
+                                       return fmt.Errorf("%s: %d: %d: %s: %T", schemaName, i, n, cmd, v)
+                               }
+                       }
+               case CmdGT, CmdLT:
+                       if vs == nil {
+                               continue
+                       }
+                       var expect int64
+                       switch v := act[1].(type) {
+                       case uint64:
+                               expect = int64(v)
+                       case int64:
+                               expect = v
+                       default:
+                               panic(fmt.Errorf("unsupported type: %+v", v))
+                       }
+                       for _, v := range vs {
+                               var got int64
+                               if single {
+                                       switch v := v.(type) {
+                                       case string:
+                                               got = int64(len(v))
+                                       case []byte:
+                                               got = int64(len(v))
+                                       case []any:
+                                               got = int64(len(v))
+                                       case map[string]any:
+                                               got = int64(len(v))
+                                       case uint64:
+                                               got = int64(v)
+                                       case int64:
+                                               got = v
+                                       default:
+                                               panic("non len-able")
+                                       }
+                               }
+                               switch cmd {
+                               case CmdGT:
+                                       if got <= expect {
+                                               return fmt.Errorf("%s: %d: %d <= %d", schemaName, i, got, expect)
+                                       }
+                               case CmdLT:
+                                       if got >= expect {
+                                               return fmt.Errorf("%s: %d: %d >= %d", schemaName, i, got, expect)
+                                       }
+                               }
+                       }
+               case CmdSchema:
+                       if vs == nil {
+                               continue
+                       }
+                       for n, v := range vs {
+                               if err := Check(act[1].(string), schemas, v); err != nil {
+                                       return fmt.Errorf("%s: %d: %d: %s: %w", schemaName, i, n, cmd, err)
+                               }
+                       }
+               default:
+                       panic("unknown command: " + cmd)
+               }
+       }
+       return nil
+}
diff --git a/go/utils/mk-bin b/go/utils/mk-bin

index 133b75f151e163e2b3b79c4a214d8ebb3945caa5e82688f8c111ad814c0d45dd..dafadc036d7d87e8870a64c972cd91b28bbd8afb325052837bd76ab3e0dd7cb2 100755 (executable)
--- a/go/utils/mk-bin
+++ b/go/utils/mk-bin
@@ -6,3 +6,5 @@ mkdir -p bin
  bin=$(realpath bin)
  cd cmd/pp
  go build -o $bin/kekspp -ldflags "$GO_LDFLAGS"
+cd ../schema-validate
+go build -o $bin/keks-schema-validate -ldflags "$GO_LDFLAGS"
diff --git a/spec/index.texi b/spec/index.texi

index 4f86294bea2670d808a60aa8c1a47e78e1e430bdc50f3c219eea4301776adb0b..6441412e97605561cab40aa2dc2df3c7a615fd2b138d24849b5567bad06d227d 100644 (file)
--- a/spec/index.texi
+++ b/spec/index.texi
@@ -131,6 +131,7 @@ and won't be able to interpret/validate them.
  @include design.texi
  @include install.texi
  @include encoding/index.texi
+@include schema/index.texi
  @include cm/index.texi
  
  @node Concepts Index
diff --git a/spec/schema/cmds.texi b/spec/schema/cmds.texi

new file mode 100644 (file)

index 0000000..4487ced
--- /dev/null
+++ b/spec/schema/cmds.texi
@@ -0,0 +1,144 @@
+@node SchemaCmds
+@cindex schema commands
+@nodedescription Schema commands
+@section Schema commands
+
+Data structure validation commands are grouped in so-called map of
+schemas. Map's key is schema name. Schema's value is a list of commands.
+Each command is a list of string-encoded words (with several exceptions).
+First element of the command's list is a command name. Possible
+following elements are command-specific.
+
+Here is full list of structure validation commands, that should be
+generated from higher level schema descriptions.
+
+@table @code
+
+@item TAKE k
+Choose the value of the "k" key in the map, if "k" is a string.
+If "k" is integer, then choose the k-th value in a list.
+If "k" equals to ".", then choose the element you are currently in
+(current map or list). Command never fails, but key can be non-existent.
+
+@item EXISTS
+Check that TAKEn element exists.
+
+@item !EXISTS
+Check that TAKEn element does not exist.
+
+@item EACH
+Execute the next command against every element of the TAKEn list, of
+every value of the map. Do nothing if taken list/map is empty.
+
+@item TYPE T0 [T1 ...]
+Check that TAKEn element's type is in (T0[, T1 ...]) set.
+Do nothing if no element was taken.
+
+Possible types: BIN, BLOB, BOOL, HEXLET, INT, LIST, MAGIC, MAP, NIL,
+STR, TAI64.
+
+@item GT n
+Check that TAKEn integer value is greater than "n". If taken value is
+either list or map, then check their length. If the value is a string,
+then check its length. Do nothing if no element was taken.
+
+@item LT n
+Same as @code{GT}, but check that value is less than "n".
+
+@item SCHEMA s
+Check TAKEn element against schema with the name equal to "s".
+Do nothing if no element was taken.
+
+@item TIMEMAXPREC p
+Check maximal allowable time precision. "p" is integer with following
+possible values: 0 -- only full seconds allowed, no parts;
+3 -- only up to milliseconds;
+6 -- only up to microseconds;
+9 -- only up to nanoseconds;
+12 -- only up to picoseconds;
+15 -- only up to femtoseconds;
+18 -- up to attoseconds;
+
+@end table
+
+For example let's check "our" structure, described in CDDL as:
+
+@verbatim
+ai = text .gt 0
+fpr = bytes .size 32
+our = {a: ai, v: bytes/text, fpr: fpr, ?comment: text}
+@end verbatim
+
+"a", "v", "fpr" fields are required ones. "v" has two allowable types.
+"comment" is optional, but typed. And "fpr" has fixed length.
+Corresponding schema can be:
+
+@verbatim
+{"our": [
+    ["TAKE", "a"],
+    ["EXISTS"],
+    ["TAKE", "a"],
+    ["TYPE", "STR"],
+    ["TAKE", "a"],
+    ["GT", 0],
+
+    ["TAKE", "v"],
+    ["EXISTS"],
+    ["TAKE", "v"],
+    ["TYPE", "BIN", "STR"],
+
+    ["TAKE", "fpr"],
+    ["EXISTS"],
+    ["TAKE", "fpr"],
+    ["TYPE", "BIN"],
+    ["TAKE", "fpr"],
+    ["GT", 31],
+    ["TAKE", "fpr"],
+    ["LT", 33],
+
+    ["TAKE", "comment"],
+    ["TYPE", "STR"],
+]}
+@end verbatim
+
+Here is example with multiple schemas:
+
+@verbatim
+latitude = -90..90
+longitude = -180..180
+where = [latitude, longitude]
+wheres = [+ where]
+@end verbatim
+
+@verbatim
+{
+    "where": [
+        ["TAKE", "."],
+        ["TYPE", "LIST"],
+        ["TAKE", "."],
+        ["GT", 1],
+        ["TAKE", "."],
+        ["LT", 3],
+        ["TAKE", "."],
+        ["EACH"],
+        ["TAKE", "INT"],
+        ["TAKE", 0],
+        ["GT", -91],
+        ["TAKE", 0],
+        ["LT", 91],
+        ["TAKE", 1],
+        ["GT", -181],
+        ["TAKE", 1],
+        ["LT", 181],
+    ],
+    "wheres": [
+        ["TAKE", "."],
+        ["TYPE", "LIST"],
+        ["TAKE", "."],
+        ["GT", 0],
+        ["TAKE", "."],
+        ["EACH"],
+        ["SCHEMA", "where"],
+    ],
+}
+@end verbatim
diff --git a/spec/schema/index.texi b/spec/schema/index.texi

new file mode 100644 (file)

index 0000000..227a434
--- /dev/null
+++ b/spec/schema/index.texi
@@ -0,0 +1,34 @@
+@node Schemas
+@cindex Schemas
+@cindex structure validation
+@cindex data schemas
+@nodedescription Structure validation against schemas
+@unnumbered Data schemas
+
+Although KEKS can be decoded without any schema definition/specification,
+data structures are likely to be checked against some kind of the schema.
+Here is suggestion (not a requirement!) to use relatively simple data
+structure validation specifications/schemas.
+
+How are data structures checked? You check if they have required fields,
+have necessary types of fields, satisfying lengths of the lists/maps or
+strings, and so on. In most cases those checks covers nearly everything
+when you sanitise the structures.
+
+So suggestion is to specify those steps for some kind of very simple
+minimalistic validation machine, that interprets them, executing
+validation commands against the provided data structures. That "machine"
+should be simple enough to be able to implement it quickly and with sane
+amount of code. Validation steps should be easily decodable and
+conveniently parsed even in C-language.
+
+Let's use KEKS format itself for the serialised validation steps! And
+generate them from higher level language/code, convenient for humans.
+
+@verbatim
+Tcl-schema -> keks-encode(validation-commands)
+     validate(keks-decode(validation-commands), keks-decode(data))
+@end verbatim
+
+@include schema/cmds.texi
+@include schema/tcl.texi
diff --git a/spec/schema/tcl.texi b/spec/schema/tcl.texi

new file mode 100644 (file)

index 0000000..d42f227
--- /dev/null
+++ b/spec/schema/tcl.texi
@@ -0,0 +1,62 @@
+@node TclSchemas
+@cindex Tcl schemas
+@nodedescription Tcl-written schemas
+@section Tcl schemas
+
+Validation commands are pretty low-level and are inconvenient to write
+by hand, at least because of huge quantity of TAKEs.
+@command{tcl/schema2bin} utility gives ability to convert much more
+nicer schemas written on Tcl language to the KEKS-encoded commands.
+
+Example with "our" structure can be written as:
+
+@verbatim
+SCHEMAS {
+our {
+    {HAS a}
+    {TYPE= a {STR}}
+    {TAKE a}
+    {GT 0}
+
+    {HAS v}
+    {TYPE= v {BIN STR}}
+
+    {HAS fpr}
+    {TYPE= fpr {BIN}}
+    {LEN= fpr 32}
+
+    {TYPE= comment {STR}}
+}
+}
+@end verbatim
+
+and @ref{cm-pub, cm/pub} as:
+
+@verbatiminclude ../tcl/schemas/pub.tcl
+
+@command{schema2bin} provides additional shorter aliased commands:
+
+@table @code
+
+@item HAS k
+Check existence of "k" element.
+
+@item !HAS k
+Opposite to HAS.
+
+@item LEN= k l
+Check that "k" has value equal to "l".
+
+@item TYPE= k Ts
+Check that "k" has type in "Ts" set.
+
+@item TYPE* k Ts
+Check that each element of "k" has type in "Ts" set.
+
+@item SCHEMA= k s
+Check "k" against "s" schema.
+
+@item SCHEMA* k s
+Check each element of "k" against "s" schema.
+
+@end table
diff --git a/tcl/keks.tcl b/tcl/keks.tcl

index 68cc9cf5ef20093cf9ed8edf1109fa9c07549b0a567a8940ee720461216e4681..a1cae56bf5a7e7fcfbe049727f808a103653e3b090b081d55d6a3812af5018fb 100644 (file)
--- a/tcl/keks.tcl
+++ b/tcl/keks.tcl
@@ -22,30 +22,34 @@ proc add {v} {
      set buf [string cat $buf $v]
  }
  
-proc char {v} { add [binary format c $v] }
+proc char {v} {add [binary format c $v]}
  
-proc EOC {} { char [expr 0x00] }
-proc NIL {} { char [expr 0x01] }
-proc FALSE {} { char [expr 0x02] }
-proc TRUE {} { char [expr 0x03] }
+proc EOC {} {char [expr 0x00]}
+proc NIL {} {char [expr 0x01]}
+proc FALSE {} {char [expr 0x02]}
+proc TRUE {} {char [expr 0x03]}
  
  proc HEXLET {v} {
      set v [binary decode hex [string map {- ""} $v]]
-    if {[string length $v] != 16} { error "bad len" }
+    if {[string length $v] != 16} {
+        error "bad len"
+    }
      char [expr 0x04]
      add $v
  }
  
  proc MAGIC {v} {
      set l [string length $v]
-    if {$l > 12} { error "too long" }
+    if {$l > 12} {
+        error "too long"
+    }
      add "KEKS"
      add $v
      add [string repeat [binary format c 0] [expr {12 - $l}]]
  }
  
  proc toBEbin {l v} {
-    set a [list]
+    set a {}
      for {set i 0} {$i < $l} {incr i} {
          set b [expr {($l - $i - 1) * 8}]
          lappend a [binary format c [expr {($v & (0xFF << $b)) >> $b}]]
@@ -53,7 +57,7 @@ proc toBEbin {l v} {
      return [join $a ""]
  }
  
-proc toBE {l v} { add [toBEbin $l $v] }
+proc toBE {l v} {add [toBEbin $l $v]}
  
  proc INT {v} {
      if {$v >= 0} {
@@ -68,7 +72,9 @@ proc INT {v} {
      }
      set l 0
      while {1} {
-        if {$v < [expr {1 << (($l+1)*8)}]} { break }
+        if {$v < [expr {1 << (($l+1)*8)}]} {
+            break
+        }
          incr l
      }
      BIN [toBEbin [expr {$l + 1}] $v]
@@ -92,16 +98,20 @@ proc _str {atom v} {
          set vl [expr {$vl - 61}]
      }
      char [expr {$atom | $lv}]
-    if {$ll > 0} { toBE $ll $vl }
+    if {$ll > 0} {
+        toBE $ll $vl
+    }
      add $v
  }
  
-proc BIN {v} { _str [expr 0x80] $v}
-proc STR {v} { _str [expr {0x80 | 0x40}] [encoding convertto utf-8 $v]}
+proc BIN {v} {_str [expr 0x80] $v}
+proc STR {v} {_str [expr {0x80 | 0x40}] [encoding convertto utf-8 $v]}
  
  proc LIST {v} {
      char [expr 0x08]
-    foreach i $v { eval $i }
+    foreach i $v {
+        eval $i
+    }
      EOC
  }
  
@@ -110,23 +120,29 @@ proc LenFirstSort {a b} {
      set b [encoding convertto utf-8 $b]
      set al [string length $a]
      set bl [string length $b]
-    if {$al < $bl} { return -1 }
-    if {$al > $bl} { return 1 }
+    if {$al < $bl} {
+        return -1
+    }
+    if {$al > $bl} {
+        return 1
+    }
      for {set i 0} {$i < [string length $a]} {incr i} {
          set av [lindex $a $i]
          set bv [lindex $b $i]
-        if {$av < $bv} { return -1 }
-        if {$av > $bv} { return 1 }
+        if {$av < $bv} {
+            return -1
+        }
+        if {$av > $bv} {
+            return 1
+        }
      }
      error "non-unique keys"
  }
  
  proc MAP {pairs} {
      set d [dict create]
-    set keys [list]
-    for {set i 0} {$i < [llength $pairs]} {incr i 2} {
-        set k [lindex $pairs $i]
-        set v [lindex $pairs [expr {$i + 1}]]
+    set keys {}
+    foreach {k v} $pairs {
          lappend keys $k
          dict set d $k $v
      }
@@ -140,8 +156,10 @@ proc MAP {pairs} {
  }
  
  proc SET {v} {
-    set args [list]
-    foreach k $v { lappend args $k NIL }
+    set args {}
+    foreach k $v {
+        lappend args $k NIL
+    }
      MAP $args
  }
  
@@ -207,10 +225,14 @@ proc toTAI64 {v} {
      variable Leapsecs
      set i 0
      for {} {$i < [llength $Leapsecs]} {incr i} {
-        if {$v < [lindex $Leapsecs $i]} { break }
+        if {$v < [lindex $Leapsecs $i]} {
+            break
+        }
      }
      set v [expr {$v + 10 + $i}]
-    if {$v == [lindex $Leapsecs $i]} { incr v }
+    if {$v == [lindex $Leapsecs $i]} {
+        incr v
+    }
      set v [expr {$v + 0x4000000000000000}]
      toBE 8 $v
  }
diff --git a/tcl/schema2bin b/tcl/schema2bin

new file mode 100755 (executable)

index 0000000..9da26c7
--- /dev/null
+++ b/tcl/schema2bin
@@ -0,0 +1,114 @@
+#!/usr/bin/env tclsh
+# schema2bin -- Convert Tcl schemas to KEKS representation
+# Copyright (C) 2024-2025 Sergey Matveev <stargrave@stargrave.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+source [file join [file dirname $::argv0] keks.tcl]
+namespace import KEKS::*
+
+proc TAKE {v} {
+    if {[string is digit $v]} {
+        set v [list INT $v]
+    } {
+        set v [list STR $v]
+    }
+    subst {{LIST {{STR TAKE} {$v}}}}
+}
+proc EXISTS {} {subst {{LIST {{STR EXISTS}}}}}
+proc !EXISTS {} {subst {{LIST {{STR !EXISTS}}}}}
+proc EACH {} {subst {{LIST {{STR EACH}}}}}
+proc TYPE {vs} {
+    set l {{STR TYPE}}
+    foreach v $vs {
+        lappend l "STR $v"
+    }
+    subst {{LIST {$l}}}
+}
+proc GT {v} {subst {{LIST {{STR GT} {INT $v}}}}}
+proc LT {v} {subst {{LIST {{STR LT} {INT $v}}}}}
+proc SCHEMA {v} {subst {{LIST {{STR SCHEMA} {STR $v}}}}}
+proc TIMEMAXPREC {v} {subst {{LIST {{STR TIMEMAXPREC} {INT $v}}}}}
+
+proc evals {cmds} {
+    set rv {}
+    foreach cmd $cmds {
+        set rv [concat $rv [eval $cmd]]
+    }
+    return $rv
+}
+
+proc SCHEMAS {v} {
+    set pairs {}
+    foreach {name cmds} $v {
+        lappend pairs $name [list LIST [evals $cmds]]
+    }
+    MAP $pairs
+}
+
+proc HAS {k} {
+    evals [subst {
+        {TAKE $k}
+        {EXISTS}
+    }]
+}
+
+proc !HAS {k} {
+    evals [subst {
+        {TAKE $k}
+        {!EXISTS}
+    }]
+}
+
+proc LEN= {k l} {
+    evals [subst {
+        {TAKE $k}
+        {GT [expr {$l - 1}]}
+        {TAKE $k}
+        {LT [expr {$l + 1}]}
+    }]
+}
+
+proc TYPE= {k types} {
+    evals [subst {
+        {TAKE $k}
+        {TYPE {$types}
+    }}]
+}
+
+proc TYPE* {k types} {
+    evals [subst {
+        {TAKE $k}
+        {EACH}
+        {TYPE {$types}}
+    }]
+}
+
+proc SCHEMA= {k schema} {
+    evals [subst {
+        {TAKE $k}
+        {SCHEMA $schema}
+    }]
+}
+
+proc SCHEMA* {k schema} {
+    evals [subst {
+        {TAKE $k}
+        {EACH}
+        {SCHEMA $schema}
+    }]
+}
+
+MAGIC schema
+source [lindex $::argv 0]
+puts [binary encode hex $::KEKS::buf]
diff --git a/tcl/schemas/pub.tcl b/tcl/schemas/pub.tcl

new file mode 100644 (file)

index 0000000..097ff63
--- /dev/null
+++ b/tcl/schemas/pub.tcl
@@ -0,0 +1,86 @@
+SCHEMAS {
+
+av {
+    {HAS a}
+    {TYPE= a {STR}}
+    {TAKE a}
+    {GT 0}
+
+    {HAS v}
+    {TYPE= v {BIN}}
+}
+pub {
+    {HAS load}
+    {SCHEMA= load load}
+    {TYPE= sigs {LIST}}
+    {SCHEMA* sigs sig}
+
+    {TYPE= pubs {LIST}}
+    {TAKE pubs}
+    {GT 0}
+    {SCHEMA* pubs pub}
+}
+load {
+    {HAS t}
+    {TYPE= t {STR}}
+    {TAKE t}
+    {GT 0}
+
+    {HAS v}
+    {SCHEMA= v pub-load}
+}
+sig {
+    {HAS tbs}
+    {HAS sign}
+    {SCHEMA= sign av}
+    {SCHEMA= tbs tbs}
+}
+tbs {
+    {HAS sid}
+    {TYPE= sid {BIN}}
+    {LEN= sid 32}
+
+    {HAS cid}
+    {TYPE= cid {HEXLET}}
+
+    {HAS exp}
+    {TYPE= exp {LIST}}
+    {LEN= exp 2}
+    {TYPE* exp {TAI64}}
+    {TAKE exp}
+    {EACH}
+    {TIMEMAXPREC 0}
+
+    {TYPE= when {TAI64}}
+
+    {TYPE= nonce {BIN}}
+    {TAKE nonce}
+    {GT 0}
+}
+pub-load {
+    {HAS id}
+    {TYPE= id {BIN}}
+    {LEN= id 32}
+
+    {!HAS crit}
+
+    {TYPE= ku {MAP}}
+    {TAKE ku}
+    {GT 0}
+    {TYPE* ku {NIL}}
+
+    {HAS pub}
+    {TYPE= pub {LIST}}
+    {TAKE pub}
+    {GT 0}
+    {SCHEMA* pub av}
+
+    {HAS sub}
+    {TAKE sub}
+    {TYPE {MAP}}
+    {TAKE sub}
+    {GT 0}
+    {TYPE* sub {STR}}
+}
+
+}
author	Sergey Matveev <stargrave@stargrave.org>
	Thu, 27 Mar 2025 08:54:40 +0000 (11:54 +0300)
committer	Sergey Matveev <stargrave@stargrave.org>
	Thu, 3 Apr 2025 11:33:40 +0000 (14:33 +0300)
c/cmd/clean		patch \| blob \| history
c/cmd/pp/pp.c		patch \| blob \| history
c/cmd/schema-validate/.gitignore	[new file with mode: 0644]	patch \| blob
c/cmd/schema-validate/all.do	[new file with mode: 0644]	patch \| blob
c/cmd/schema-validate/clean	[new file with mode: 0755]	patch \| blob
c/cmd/schema-validate/schema-validate.c	[new file with mode: 0644]	patch \| blob
c/cmd/schema-validate/schema-validate.do	[new file with mode: 0644]	patch \| blob
c/lib/items.c		patch \| blob \| history
c/lib/items.h		patch \| blob \| history
c/lib/o.list		patch \| blob \| history
c/lib/schema.c	[new file with mode: 0644]	patch \| blob
c/lib/schema.h	[new file with mode: 0644]	patch \| blob
go/cmd/schema-validate/main.go	[new file with mode: 0644]	patch \| blob
go/schema/check.go	[new file with mode: 0644]	patch \| blob
go/utils/mk-bin		patch \| blob \| history
spec/index.texi		patch \| blob \| history
spec/schema/cmds.texi	[new file with mode: 0644]	patch \| blob
spec/schema/index.texi	[new file with mode: 0644]	patch \| blob
spec/schema/tcl.texi	[new file with mode: 0644]	patch \| blob
tcl/keks.tcl		patch \| blob \| history
tcl/schema2bin	[new file with mode: 0755]	patch \| blob
tcl/schemas/pub.tcl	[new file with mode: 0644]	patch \| blob