From 96ae592c5cea1b684250bdc07ee40247e873378bac8cdc34dc13075c3e2a7113 Mon Sep 17 00:00:00 2001
From: Sergey Matveev <stargrave@stargrave.org>
Date: Mon, 16 Jun 2025 14:29:31 +0300
Subject: [PATCH] Less TAKEs in schemas

---
 c/lib/schema.c     | 106 +++++++++++++++++++++------------------------
 go/schema/check.go |  52 ++++++++++------------
 spec/schema/cmds   |  17 ++------
 spec/schema/index  |  14 +++---
 spec/schema/tcl    |  30 +++++++------
 tcl/schema.tcl     |  16 +------
 6 files changed, 104 insertions(+), 131 deletions(-)

diff --git a/c/lib/schema.c b/c/lib/schema.c
index 2597882..72c0e30 100644
--- a/c/lib/schema.c
+++ b/c/lib/schema.c
@@ -152,6 +152,7 @@ keksSchemaCmd( // NOLINT(misc-no-recursion)
 {
     size_t origIdxSchema = idxSchema;
     struct KEKSSchemaErr err;
+    size_t v = *taken;
 Eached:
     idxSchema = origIdxSchema;
     err.offSchema = schema->offsets[idxSchema];
@@ -170,14 +171,14 @@ Eached:
     }
     if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdExists)) {
         err.msg = "EXISTS";
-        if ((*taken) == SIZE_MAX) {
+        if (v == SIZE_MAX) {
             err.code = KEKSSchemaErrInvalidData;
             return err;
         }
         err.code = KEKSSchemaErrNo;
     } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdNotExists)) {
         err.msg = "!EXISTS";
-        if ((*taken) != SIZE_MAX) {
+        if (v != SIZE_MAX) {
             err.code = KEKSSchemaErrInvalidData;
             return err;
         }
@@ -248,11 +249,12 @@ Eached:
             err.msg = "bad TAKE target";
             return err;
         }
+        v = *taken;
         err.msg = "TAKE";
         err.code = KEKSSchemaErrNo;
     } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdEq)) {
         err.msg = "EQ";
-        if ((*taken) == SIZE_MAX) {
+        if (v == SIZE_MAX) {
             err.code = KEKSSchemaErrNo;
         } else {
             idxSchema = schema->list[idxSchema].next;
@@ -268,10 +270,10 @@ Eached:
                 return err;
             }
             err.code = ((schema->list[idxSchema].atom.v.str.len ==
-                         data->list[*taken].atom.v.str.len) &&
+                         data->list[v].atom.v.str.len) &&
                         (memcmp(
                              schema->list[idxSchema].atom.v.str.ptr,
-                             data->list[*taken].atom.v.str.ptr,
+                             data->list[v].atom.v.str.ptr,
                              schema->list[idxSchema].atom.v.str.len) == 0)) ?
                            KEKSSchemaErrNo :
                            KEKSSchemaErrInvalidData;
@@ -280,11 +282,11 @@ Eached:
         err.msg = "EACH";
         (*eachInList) = false;
         (*eachInMap) = false;
-        if ((*taken) == SIZE_MAX) {
+        if (v == SIZE_MAX) {
             err.code = KEKSSchemaErrNo;
             return err;
         }
-        switch (data->list[*taken].atom.typ) {
+        switch (data->list[v].atom.typ) {
         case KEKSItemList:
             (*eachInList) = true;
             break;
@@ -311,12 +313,12 @@ Eached:
             err.msg = "non-iterable EACH";
             return err;
         }
-        if (data->list[*taken].atom.v.list.len == 0) {
+        if (data->list[v].atom.v.list.len == 0) {
             (*taken) = SIZE_MAX;
         } else {
-            (*taken) = data->list[*taken].atom.v.list.head;
+            (*taken) = data->list[v].atom.v.list.head;
             if (*eachInMap) {
-                (*taken) = data->list[*taken].next;
+                (*taken) = data->list[(*taken)].next;
             }
         }
         err.code = KEKSSchemaErrNo;
@@ -383,12 +385,12 @@ Eached:
             idxSchema = schema->list[idxSchema].next;
         }
         err.msg = "TYPE";
-        if ((*taken) == SIZE_MAX) {
+        if (v == SIZE_MAX) {
             err.code = KEKSSchemaErrNo;
         } else {
             bool found = false;
             for (size_t i = 0; i < expectedLen; i++) {
-                if (expected[i] == data->list[*taken].atom.typ) {
+                if (expected[i] == data->list[v].atom.typ) {
                     found = true;
                     break;
                 }
@@ -397,7 +399,7 @@ Eached:
         }
     } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdGT)) {
         err.msg = "GT";
-        if ((*taken) == SIZE_MAX) {
+        if (v == SIZE_MAX) {
             err.code = KEKSSchemaErrNo;
         } else {
             idxSchema = schema->list[idxSchema].next;
@@ -407,11 +409,11 @@ Eached:
                 return err;
             }
             err.offSchema = schema->offsets[idxSchema];
-            err.offData = data->offsets[*taken];
+            err.offData = data->offsets[v];
             int64_t our = 0;
             int64_t their = 0;
             struct KEKSSchemaErr errLens =
-                keksSchemaLens(&our, &their, schema, data, idxSchema, *taken);
+                keksSchemaLens(&our, &their, schema, data, idxSchema, v);
             if (errLens.code != KEKSSchemaErrNo) {
                 return errLens;
             }
@@ -419,7 +421,7 @@ Eached:
         }
     } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdLT)) {
         err.msg = "LT";
-        if ((*taken) == SIZE_MAX) {
+        if (v == SIZE_MAX) {
             err.code = KEKSSchemaErrNo;
         } else {
             idxSchema = schema->list[idxSchema].next;
@@ -429,11 +431,11 @@ Eached:
                 return err;
             }
             err.offSchema = schema->offsets[idxSchema];
-            err.offData = data->offsets[*taken];
+            err.offData = data->offsets[v];
             int64_t our = 0;
             int64_t their = 0;
             struct KEKSSchemaErr errLens =
-                keksSchemaLens(&our, &their, schema, data, idxSchema, *taken);
+                keksSchemaLens(&our, &their, schema, data, idxSchema, v);
             if (errLens.code != KEKSSchemaErrNo) {
                 return errLens;
             }
@@ -464,11 +466,11 @@ Eached:
         }
         err.offSchema = schema->offsets[idxSchema];
         err.msg = "SCHEMA";
-        if ((*taken) == SIZE_MAX) {
+        if (v == SIZE_MAX) {
             err.code = KEKSSchemaErrNo;
         } else {
             struct KEKSSchemaErr errSchema =
-                KEKSSchemaValidate(schema, data, idxSchema, *taken);
+                KEKSSchemaValidate(schema, data, idxSchema, v);
             if (errSchema.code != KEKSSchemaErrNo) {
                 return errSchema;
             }
@@ -488,67 +490,67 @@ Eached:
             return err;
         }
         err.msg = "TIMEPREC";
-        if ((*taken) == SIZE_MAX) {
+        if (v == SIZE_MAX) {
             err.code = KEKSSchemaErrNo;
         } else {
-            if (data->list[*taken].atom.typ != KEKSItemTAI64) {
+            if (data->list[v].atom.typ != KEKSItemTAI64) {
                 err.code = KEKSSchemaErrUnexpectedState;
                 err.msg = "non-TAI64 taken";
                 return err;
             }
-            uint32_t v = 0;
+            uint32_t val = 0;
             switch (schema->list[idxSchema].atom.v.pint) {
             case 0: // s
-                if (data->list[*taken].atom.v.str.len > 8) {
+                if (data->list[v].atom.v.str.len > 8) {
                     err.code = KEKSSchemaErrInvalidData;
                     err.msg = ">TAI64";
                 }
                 break;
             case 3: // ms
-                if (data->list[*taken].atom.v.str.len > 12) {
+                if (data->list[v].atom.v.str.len > 12) {
                     err.code = KEKSSchemaErrInvalidData;
                     err.msg = ">TAI64N";
                 }
-                if (data->list[*taken].atom.v.str.len > 8) {
-                    v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 8, 4);
-                    if ((v % 1000000) != 0) {
+                if (data->list[v].atom.v.str.len > 8) {
+                    val = (uint32_t)keksFromBE(data->list[v].atom.v.str.ptr + 8, 4);
+                    if ((val % 1000000) != 0) {
                         err.code = KEKSSchemaErrInvalidData;
                         err.msg = ">ms";
                     }
                 }
                 break;
             case 6: // Âµs
-                if (data->list[*taken].atom.v.str.len > 12) {
+                if (data->list[v].atom.v.str.len > 12) {
                     err.code = KEKSSchemaErrInvalidData;
                     err.msg = ">TAI64N";
                 }
-                if (data->list[*taken].atom.v.str.len > 8) {
-                    v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 8, 4);
-                    if ((v % 1000) != 0) {
+                if (data->list[v].atom.v.str.len > 8) {
+                    val = (uint32_t)keksFromBE(data->list[v].atom.v.str.ptr + 8, 4);
+                    if ((val % 1000) != 0) {
                         err.code = KEKSSchemaErrInvalidData;
                         err.msg = ">Âµs";
                     }
                 }
                 break;
             case 9: // ns
-                if (data->list[*taken].atom.v.str.len > 12) {
+                if (data->list[v].atom.v.str.len > 12) {
                     err.code = KEKSSchemaErrInvalidData;
                     err.msg = ">TAI64N";
                 }
                 break;
             case 12: // ps
-                if (data->list[*taken].atom.v.str.len > 12) {
-                    v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 12, 4);
-                    if ((v % 1000000) != 0) {
+                if (data->list[v].atom.v.str.len > 12) {
+                    val = (uint32_t)keksFromBE(data->list[v].atom.v.str.ptr + 12, 4);
+                    if ((val % 1000000) != 0) {
                         err.code = KEKSSchemaErrInvalidData;
                         err.msg = ">ps";
                     }
                 }
                 break;
             case 15: // fs
-                if (data->list[*taken].atom.v.str.len > 12) {
-                    v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 12, 4);
-                    if ((v % 1000) != 0) {
+                if (data->list[v].atom.v.str.len > 12) {
+                    val = (uint32_t)keksFromBE(data->list[v].atom.v.str.ptr + 12, 4);
+                    if ((val % 1000) != 0) {
                         err.code = KEKSSchemaErrInvalidData;
                         err.msg = ">fs";
                     }
@@ -565,19 +567,17 @@ Eached:
         }
     } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdUTC)) {
         err.msg = "UTC";
-        if ((*taken) == SIZE_MAX) {
+        if (v == SIZE_MAX) {
             err.code = KEKSSchemaErrNo;
         } else {
-            if (data->list[*taken].atom.typ != KEKSItemTAI64) {
+            if (data->list[v].atom.typ != KEKSItemTAI64) {
                 err.code = KEKSSchemaErrUnexpectedState;
                 err.msg = "non-TAI64 taken";
                 return err;
             }
             struct timespec tv;
             enum KEKSErr errConvert = KEKSTAI64ToTimespec(
-                &tv,
-                data->list[*taken].atom.v.str.ptr,
-                data->list[*taken].atom.v.str.len);
+                &tv, data->list[v].atom.v.str.ptr, data->list[v].atom.v.str.len);
             if (errConvert != KEKSErrNo) {
                 err.code = KEKSSchemaErrInvalidData;
                 err.msg = "can not convert to timespec";
@@ -599,22 +599,16 @@ Eached:
         return err;
     }
     if (*eachInList) {
-        (*taken) = data->list[*taken].next;
-        if ((*taken) == 0) {
-            (*taken) = SIZE_MAX;
-        } else {
+        v = data->list[v].next;
+        if (v != 0) {
             goto Eached;
         }
     }
     if (*eachInMap) {
-        (*taken) = data->list[*taken].next; // key
-        if ((*taken) == 0) {
-            (*taken) = SIZE_MAX;
-        } else {
-            (*taken) = data->list[*taken].next; // value
-            if ((*taken) == 0) {
-                (*taken) = SIZE_MAX;
-            } else {
+        v = data->list[v].next; // key
+        if (v != 0) {
+            v = data->list[v].next; // value
+            if (v != 0) {
                 goto Eached;
             }
         }
diff --git a/go/schema/check.go b/go/schema/check.go
index 83b79ef..42d8d97 100644
--- a/go/schema/check.go
+++ b/go/schema/check.go
@@ -86,7 +86,6 @@ func Check(schemaName string, schemas map[string][][]any, data any) error {
 		}}
 	}
 	var taken string
-	var single bool // TAKEn, not EACH
 	var vs []any
 	for cmdIdx, cmd := range cmds {
 		if len(cmd) == 0 {
@@ -125,7 +124,7 @@ func Check(schemaName string, schemas map[string][][]any, data any) error {
 			}
 		case CmdTake:
 			taken = ""
-			single = true
+			vs = nil
 			if len(cmd) != 2 {
 				return &SchemaErr{BaseErr: BaseErr{
 					SchemaName: schemaName,
@@ -153,7 +152,6 @@ func Check(schemaName string, schemas map[string][][]any, data any) error {
 					}
 					v, exists := m[k]
 					if !exists {
-						vs = nil
 						continue
 					}
 					vs = []any{v}
@@ -183,7 +181,6 @@ func Check(schemaName string, schemas map[string][][]any, data any) error {
 				}}
 			}
 		case CmdEach:
-			single = false
 			if vs == nil {
 				continue
 			}
@@ -205,6 +202,7 @@ func Check(schemaName string, schemas map[string][][]any, data any) error {
 					Msg:        "non iterable",
 				}}
 			}
+			taken += "*"
 		case CmdEq:
 			if vs == nil {
 				continue
@@ -411,30 +409,28 @@ func Check(schemaName string, schemas map[string][][]any, data any) error {
 			}
 			for _, v := range vs {
 				var got int64
-				if single {
-					switch v := v.(type) {
-					case string:
-						got = int64(len(v))
-					case []byte:
-						got = int64(len(v))
-					case []any:
-						got = int64(len(v))
-					case map[string]any:
-						got = int64(len(v))
-					case uint64:
-						got = int64(v)
-					case int64:
-						got = v
-					default:
-						return &SchemaErr{BaseErr: BaseErr{
-							SchemaName: schemaName,
-							CmdIdx:     cmdIdx,
-							CmdName:    cmdName,
-							Taken:      taken,
-							Msg:        "non len-able",
-							Data:       v,
-						}}
-					}
+				switch v := v.(type) {
+				case string:
+					got = int64(len(v))
+				case []byte:
+					got = int64(len(v))
+				case []any:
+					got = int64(len(v))
+				case map[string]any:
+					got = int64(len(v))
+				case uint64:
+					got = int64(v)
+				case int64:
+					got = v
+				default:
+					return &SchemaErr{BaseErr: BaseErr{
+						SchemaName: schemaName,
+						CmdIdx:     cmdIdx,
+						CmdName:    cmdName,
+						Taken:      taken,
+						Msg:        "non len-able",
+						Data:       v,
+					}}
 				}
 				ok = false
 				switch cmdName {
diff --git a/spec/schema/cmds b/spec/schema/cmds
index 47a936d..9bf4ea5 100644
--- a/spec/schema/cmds
+++ b/spec/schema/cmds
@@ -12,6 +12,7 @@ TAKE | [".", k]
     If "k" is integer, then choose the k-th value in a list.
     If "k" equals to ".", then choose the element you are currently in.
     Command never fails, but key can be non-existent.
+    All following commands will be applied to the taken value.
 
 EXISTS | ["E"]
     Assure that chosen element exists.
@@ -59,6 +60,7 @@ EQ | ["=", v]
     Check that chosen (if it exists) element's value equals to binary
     string "v".
 
+=> https://datatracker.ietf.org/doc/html/rfc8610 CDDL
 For example let's check "our" structure, described in CDDL as:
 
     ai = text .gt 0
@@ -72,23 +74,17 @@ Corresponding schema can be:
     {"our": [
         [".", "a"],
         ["E"],
-        [".", "a"],
         ["T", "STR"],
-        [".", "a"],
         [">", 0],
 
         [".", "v"],
         ["E"],
-        [".", "v"],
         ["T", "BIN", "STR"],
 
         [".", "fpr"],
         ["E"],
-        [".", "fpr"],
         ["T", "BIN"],
-        [".", "fpr"],
         [">", 31],
-        [".", "fpr"],
         ["<", 33],
 
         [".", "comment"],
@@ -106,28 +102,23 @@ Here is an example with multiple schemas:
         "where": [
             [".", "."],
             ["T", "LIST"],
-            [".", "."],
             [">", 1],
-            [".", "."],
             ["<", 3],
-            [".", "."],
             ["*"],
             ["T", "INT"],
+
             [".", 0],
             [">", -91],
-            [".", 0],
             ["<", 91],
+
             [".", 1],
             [">", -181],
-            [".", 1],
             ["<", 181],
         ],
         "wheres": [
             [".", "."],
             ["T", "LIST"],
-            [".", "."],
             [">", 0],
-            [".", "."],
             ["*"],
             ["S", "where"],
         ],
diff --git a/spec/schema/index b/spec/schema/index
index 610c99b..237b18b 100644
--- a/spec/schema/index
+++ b/spec/schema/index
@@ -8,16 +8,16 @@ have necessary types of fields, satisfying lengths of the lists/maps or
 strings, and so on. In most cases those checks covers nearly everything
 when you sanitise the structures.
 
-So suggestion is to specify those steps for some kind of very simple
-minimalistic validation machine, that interprets them, executing
-validation commands ([schema/cmds]) against the provided data
-structures. That "machine" should be simple enough to be able to
-implement it quickly and with sane amount of code. Validation steps
-should be easily decodable and conveniently parsed even in C-language.
+So suggestion is to specify those steps for some kind of the
+interpreter, that executes validation commands ([schema/cmds]) against
+the provided data structures. That interpreter should be simple enough
+to be able to implement it quickly and with sane amount of code.
+Validation steps should be easily decodable and conveniently parsed even
+in C-language.
 
 Let's use KEKS format itself for the serialised validation steps! And
 generate them from higher level language/code ([schema/tcl]), convenient
 for humans.
 
     [schema/tcl] -> [schema/cmds] -> keks-encode() => schema
-    validate(schema, keks-decode(data))
+    validate(schema, data)
diff --git a/spec/schema/tcl b/spec/schema/tcl
index 73b8555..857c0db 100644
--- a/spec/schema/tcl
+++ b/spec/schema/tcl
@@ -1,10 +1,9 @@
-Validation commands are pretty low-level and are inconvenient to write
-by hand, at least because of huge quantity of TAKEs.
-tcl/schema.tcl utility gives ability to convert much more nicer schemas
-written on Tcl language to the KEKS-encoded commands. We call those
-Tcl-written schemas "KEKS/Schema".
+Validation commands ([schema/cmds]) are pretty low-level and are
+inconvenient to write by hand. tcl/schema.tcl utility gives ability to
+convert much more nicer schemas written on Tcl language to the
+KEKS-encoded commands. We call those Tcl-written schemas "KEKS/Schema".
 
-Example with "our" structure ([schema/cmds]) can be written as:
+Example with "our" structure (from [schema/cmds]) can be written as:
 
     ai {{field . {str} >0}}
     fpr {{field . {bin} len=32}}
@@ -22,7 +21,7 @@ and [cm/pub/] as:
 <<    [schemas/pub-load.tcl]
 <<    [schemas/pub-sig-tbs.tcl]
 
-schema.tcl calls "schemas {s0 cmds0 s1 cmds1 ...}" commands to produce
+schema.tcl calls "schemas {s0 cmds0 s1 cmds1 ...}" command to produce
 an encoded map with "cmds*" commands for "s*" schemas.
 
 "field" command helps creation of commands related to the field.
@@ -30,10 +29,10 @@ an encoded map with "cmds*" commands for "s*" schemas.
 Its first argument is either field's name in the map, or list's index or
 dot, meaning the self-structure itself.
 
-Second argument is a list of allowable types, written in lowercase. If
-that list consists of "with S", then SCHEMA command will be called
-instead of TYPE checking. If list consists of "set", then it is
-checked to be a MAP with EACH value of NIL.
+Second argument is a list of allowable types, written in lowercase.
+If that list equals to {with S}, then {SCHEMA s} command will be called
+instead of TYPE checking. If list equals to {set}, then it is checked
+to be a MAP with EACH value of NIL.
 
 All other arguments are optional.
 
@@ -44,11 +43,16 @@ you can specify empty list of types in second argument.
 
 ">n" and "<n" arguments allow checking of the integer value or
 the lengths. ">0" assures that either list/map or strings are not
-empty. "len=n" checks the exact length. "=v" checks that given
-element has specified string/binary value (use "len=" for integers).
+empty.
+
+"len=n" checks the exact length.
+
+"=v" checks that given element has specified string/binary value
+(use "len=" for integers).
 
 "prec=p" issues TIMEPREC command, but instead of specifying the raw
 integer values, you choose one of: s, ms, us, ns, ps, fs, as.
+
 "utc" issues UTC command.
 
 {of s} argument issues checking of EACH element of the list or map
diff --git a/tcl/schema.tcl b/tcl/schema.tcl
index 35f7671..60c528e 100755
--- a/tcl/schema.tcl
+++ b/tcl/schema.tcl
@@ -50,47 +50,39 @@ set knownTypes {bin blob bool hexlet int list magic map nil set str tai}
 
 proc field {k types args} {
     upvar _cmds _cmds buf buf
+    lappend _cmds [TAKE $k]
     if {[lindex $types 0] == "with"} {
-        lappend _cmds [TAKE $k]
         lappend _cmds [SCHEMA [lindex $types 1]]
     } elseif {[lsearch -exact $types set] == -1} {
         if {[llength $types] != 0} {
-            lappend _cmds [TAKE $k]
             lappend _cmds [TYPE $types]
         }
     } else {
         if {[llength $types] != 1} {
             error "set can be the only one among types"
         }
-        lappend _cmds [TAKE $k]
         lappend _cmds [TYPE {MAP}]
-        lappend _cmds [TAKE $k]
         lappend _cmds [EACH]
         lappend _cmds [TYPE {NIL}]
+        lappend _cmds [TAKE $k]
     }
     if {[lsearch -exact $args !exists] != -1} {
-        lappend _cmds [TAKE $k]
         lappend _cmds [!EXISTS]
     } elseif {[lsearch -exact $args optional] == -1} {
-        lappend _cmds [TAKE $k]
         lappend _cmds [EXISTS]
     }
     set i [lsearch -glob $args "len=*"]
     if {$i != -1} {
         set n [string range [lindex $args $i] 4 end]
-        lappend _cmds [TAKE $k]
         lappend _cmds [GT [expr {$n - 1}]]
-        lappend _cmds [TAKE $k]
         lappend _cmds [LT [expr {$n + 1}]]
     }
     set i [lsearch -glob $args ">*"]
     if {$i != -1} {
-        lappend _cmds [TAKE $k]
         lappend _cmds [GT [string range [lindex $args $i] 1 end]]
     }
     set i [lsearch -glob $args "<*"]
     if {$i != -1} {
-        lappend _cmds [TAKE $k]
         lappend _cmds [LT [string range [lindex $args $i] 1 end]]
     }
     set i [lsearch -glob $args "prec=*"]
@@ -98,22 +90,18 @@ proc field {k types args} {
         set p [string range [lindex $args $i] 5 end]
         variable timeprecArgs
         set p [dict get $timeprecArgs $p]
-        lappend _cmds [TAKE $k]
         lappend _cmds [TIMEPREC $p]
     }
     if {[lsearch -exact $args utc] != -1} {
-        lappend _cmds [TAKE $k]
         lappend _cmds [UTC]
     }
     set i [lsearch -glob $args "=*"]
     if {$i != -1} {
-        lappend _cmds [TAKE $k]
         lappend _cmds [EQ [string range [lindex $args $i] 1 end]]
     }
     set i [lsearch -glob $args "of *"]
     if {$i != -1} {
         set s [lindex [lindex $args $i] 1]
-        lappend _cmds [TAKE $k]
         lappend _cmds [EACH]
         variable knownTypes
         if {[lsearch -exact $knownTypes $s] == -1} {
-- 
2.50.0