From 96ae592c5cea1b684250bdc07ee40247e873378bac8cdc34dc13075c3e2a7113 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Mon, 16 Jun 2025 14:29:31 +0300 Subject: [PATCH] Less TAKEs in schemas --- c/lib/schema.c | 106 +++++++++++++++++++++------------------------ go/schema/check.go | 52 ++++++++++------------ spec/schema/cmds | 17 ++------ spec/schema/index | 14 +++--- spec/schema/tcl | 30 +++++++------ tcl/schema.tcl | 16 +------ 6 files changed, 104 insertions(+), 131 deletions(-) diff --git a/c/lib/schema.c b/c/lib/schema.c index 2597882..72c0e30 100644 --- a/c/lib/schema.c +++ b/c/lib/schema.c @@ -152,6 +152,7 @@ keksSchemaCmd( // NOLINT(misc-no-recursion) { size_t origIdxSchema = idxSchema; struct KEKSSchemaErr err; + size_t v = *taken; Eached: idxSchema = origIdxSchema; err.offSchema = schema->offsets[idxSchema]; @@ -170,14 +171,14 @@ Eached: } if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdExists)) { err.msg = "EXISTS"; - if ((*taken) == SIZE_MAX) { + if (v == SIZE_MAX) { err.code = KEKSSchemaErrInvalidData; return err; } err.code = KEKSSchemaErrNo; } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdNotExists)) { err.msg = "!EXISTS"; - if ((*taken) != SIZE_MAX) { + if (v != SIZE_MAX) { err.code = KEKSSchemaErrInvalidData; return err; } @@ -248,11 +249,12 @@ Eached: err.msg = "bad TAKE target"; return err; } + v = *taken; err.msg = "TAKE"; err.code = KEKSSchemaErrNo; } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdEq)) { err.msg = "EQ"; - if ((*taken) == SIZE_MAX) { + if (v == SIZE_MAX) { err.code = KEKSSchemaErrNo; } else { idxSchema = schema->list[idxSchema].next; @@ -268,10 +270,10 @@ Eached: return err; } err.code = ((schema->list[idxSchema].atom.v.str.len == - data->list[*taken].atom.v.str.len) && + data->list[v].atom.v.str.len) && (memcmp( schema->list[idxSchema].atom.v.str.ptr, - data->list[*taken].atom.v.str.ptr, + data->list[v].atom.v.str.ptr, schema->list[idxSchema].atom.v.str.len) == 0)) ? KEKSSchemaErrNo : KEKSSchemaErrInvalidData; @@ -280,11 +282,11 @@ Eached: err.msg = "EACH"; (*eachInList) = false; (*eachInMap) = false; - if ((*taken) == SIZE_MAX) { + if (v == SIZE_MAX) { err.code = KEKSSchemaErrNo; return err; } - switch (data->list[*taken].atom.typ) { + switch (data->list[v].atom.typ) { case KEKSItemList: (*eachInList) = true; break; @@ -311,12 +313,12 @@ Eached: err.msg = "non-iterable EACH"; return err; } - if (data->list[*taken].atom.v.list.len == 0) { + if (data->list[v].atom.v.list.len == 0) { (*taken) = SIZE_MAX; } else { - (*taken) = data->list[*taken].atom.v.list.head; + (*taken) = data->list[v].atom.v.list.head; if (*eachInMap) { - (*taken) = data->list[*taken].next; + (*taken) = data->list[(*taken)].next; } } err.code = KEKSSchemaErrNo; @@ -383,12 +385,12 @@ Eached: idxSchema = schema->list[idxSchema].next; } err.msg = "TYPE"; - if ((*taken) == SIZE_MAX) { + if (v == SIZE_MAX) { err.code = KEKSSchemaErrNo; } else { bool found = false; for (size_t i = 0; i < expectedLen; i++) { - if (expected[i] == data->list[*taken].atom.typ) { + if (expected[i] == data->list[v].atom.typ) { found = true; break; } @@ -397,7 +399,7 @@ Eached: } } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdGT)) { err.msg = "GT"; - if ((*taken) == SIZE_MAX) { + if (v == SIZE_MAX) { err.code = KEKSSchemaErrNo; } else { idxSchema = schema->list[idxSchema].next; @@ -407,11 +409,11 @@ Eached: return err; } err.offSchema = schema->offsets[idxSchema]; - err.offData = data->offsets[*taken]; + err.offData = data->offsets[v]; int64_t our = 0; int64_t their = 0; struct KEKSSchemaErr errLens = - keksSchemaLens(&our, &their, schema, data, idxSchema, *taken); + keksSchemaLens(&our, &their, schema, data, idxSchema, v); if (errLens.code != KEKSSchemaErrNo) { return errLens; } @@ -419,7 +421,7 @@ Eached: } } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdLT)) { err.msg = "LT"; - if ((*taken) == SIZE_MAX) { + if (v == SIZE_MAX) { err.code = KEKSSchemaErrNo; } else { idxSchema = schema->list[idxSchema].next; @@ -429,11 +431,11 @@ Eached: return err; } err.offSchema = schema->offsets[idxSchema]; - err.offData = data->offsets[*taken]; + err.offData = data->offsets[v]; int64_t our = 0; int64_t their = 0; struct KEKSSchemaErr errLens = - keksSchemaLens(&our, &their, schema, data, idxSchema, *taken); + keksSchemaLens(&our, &their, schema, data, idxSchema, v); if (errLens.code != KEKSSchemaErrNo) { return errLens; } @@ -464,11 +466,11 @@ Eached: } err.offSchema = schema->offsets[idxSchema]; err.msg = "SCHEMA"; - if ((*taken) == SIZE_MAX) { + if (v == SIZE_MAX) { err.code = KEKSSchemaErrNo; } else { struct KEKSSchemaErr errSchema = - KEKSSchemaValidate(schema, data, idxSchema, *taken); + KEKSSchemaValidate(schema, data, idxSchema, v); if (errSchema.code != KEKSSchemaErrNo) { return errSchema; } @@ -488,67 +490,67 @@ Eached: return err; } err.msg = "TIMEPREC"; - if ((*taken) == SIZE_MAX) { + if (v == SIZE_MAX) { err.code = KEKSSchemaErrNo; } else { - if (data->list[*taken].atom.typ != KEKSItemTAI64) { + if (data->list[v].atom.typ != KEKSItemTAI64) { err.code = KEKSSchemaErrUnexpectedState; err.msg = "non-TAI64 taken"; return err; } - uint32_t v = 0; + uint32_t val = 0; switch (schema->list[idxSchema].atom.v.pint) { case 0: // s - if (data->list[*taken].atom.v.str.len > 8) { + if (data->list[v].atom.v.str.len > 8) { err.code = KEKSSchemaErrInvalidData; err.msg = ">TAI64"; } break; case 3: // ms - if (data->list[*taken].atom.v.str.len > 12) { + if (data->list[v].atom.v.str.len > 12) { err.code = KEKSSchemaErrInvalidData; err.msg = ">TAI64N"; } - if (data->list[*taken].atom.v.str.len > 8) { - v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 8, 4); - if ((v % 1000000) != 0) { + if (data->list[v].atom.v.str.len > 8) { + val = (uint32_t)keksFromBE(data->list[v].atom.v.str.ptr + 8, 4); + if ((val % 1000000) != 0) { err.code = KEKSSchemaErrInvalidData; err.msg = ">ms"; } } break; case 6: // µs - if (data->list[*taken].atom.v.str.len > 12) { + if (data->list[v].atom.v.str.len > 12) { err.code = KEKSSchemaErrInvalidData; err.msg = ">TAI64N"; } - if (data->list[*taken].atom.v.str.len > 8) { - v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 8, 4); - if ((v % 1000) != 0) { + if (data->list[v].atom.v.str.len > 8) { + val = (uint32_t)keksFromBE(data->list[v].atom.v.str.ptr + 8, 4); + if ((val % 1000) != 0) { err.code = KEKSSchemaErrInvalidData; err.msg = ">µs"; } } break; case 9: // ns - if (data->list[*taken].atom.v.str.len > 12) { + if (data->list[v].atom.v.str.len > 12) { err.code = KEKSSchemaErrInvalidData; err.msg = ">TAI64N"; } break; case 12: // ps - if (data->list[*taken].atom.v.str.len > 12) { - v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 12, 4); - if ((v % 1000000) != 0) { + if (data->list[v].atom.v.str.len > 12) { + val = (uint32_t)keksFromBE(data->list[v].atom.v.str.ptr + 12, 4); + if ((val % 1000000) != 0) { err.code = KEKSSchemaErrInvalidData; err.msg = ">ps"; } } break; case 15: // fs - if (data->list[*taken].atom.v.str.len > 12) { - v = (uint32_t)keksFromBE(data->list[*taken].atom.v.str.ptr + 12, 4); - if ((v % 1000) != 0) { + if (data->list[v].atom.v.str.len > 12) { + val = (uint32_t)keksFromBE(data->list[v].atom.v.str.ptr + 12, 4); + if ((val % 1000) != 0) { err.code = KEKSSchemaErrInvalidData; err.msg = ">fs"; } @@ -565,19 +567,17 @@ Eached: } } else if (KEKSStrEqual(&(schema->list[idxSchema].atom), CmdUTC)) { err.msg = "UTC"; - if ((*taken) == SIZE_MAX) { + if (v == SIZE_MAX) { err.code = KEKSSchemaErrNo; } else { - if (data->list[*taken].atom.typ != KEKSItemTAI64) { + if (data->list[v].atom.typ != KEKSItemTAI64) { err.code = KEKSSchemaErrUnexpectedState; err.msg = "non-TAI64 taken"; return err; } struct timespec tv; enum KEKSErr errConvert = KEKSTAI64ToTimespec( - &tv, - data->list[*taken].atom.v.str.ptr, - data->list[*taken].atom.v.str.len); + &tv, data->list[v].atom.v.str.ptr, data->list[v].atom.v.str.len); if (errConvert != KEKSErrNo) { err.code = KEKSSchemaErrInvalidData; err.msg = "can not convert to timespec"; @@ -599,22 +599,16 @@ Eached: return err; } if (*eachInList) { - (*taken) = data->list[*taken].next; - if ((*taken) == 0) { - (*taken) = SIZE_MAX; - } else { + v = data->list[v].next; + if (v != 0) { goto Eached; } } if (*eachInMap) { - (*taken) = data->list[*taken].next; // key - if ((*taken) == 0) { - (*taken) = SIZE_MAX; - } else { - (*taken) = data->list[*taken].next; // value - if ((*taken) == 0) { - (*taken) = SIZE_MAX; - } else { + v = data->list[v].next; // key + if (v != 0) { + v = data->list[v].next; // value + if (v != 0) { goto Eached; } } diff --git a/go/schema/check.go b/go/schema/check.go index 83b79ef..42d8d97 100644 --- a/go/schema/check.go +++ b/go/schema/check.go @@ -86,7 +86,6 @@ func Check(schemaName string, schemas map[string][][]any, data any) error { }} } var taken string - var single bool // TAKEn, not EACH var vs []any for cmdIdx, cmd := range cmds { if len(cmd) == 0 { @@ -125,7 +124,7 @@ func Check(schemaName string, schemas map[string][][]any, data any) error { } case CmdTake: taken = "" - single = true + vs = nil if len(cmd) != 2 { return &SchemaErr{BaseErr: BaseErr{ SchemaName: schemaName, @@ -153,7 +152,6 @@ func Check(schemaName string, schemas map[string][][]any, data any) error { } v, exists := m[k] if !exists { - vs = nil continue } vs = []any{v} @@ -183,7 +181,6 @@ func Check(schemaName string, schemas map[string][][]any, data any) error { }} } case CmdEach: - single = false if vs == nil { continue } @@ -205,6 +202,7 @@ func Check(schemaName string, schemas map[string][][]any, data any) error { Msg: "non iterable", }} } + taken += "*" case CmdEq: if vs == nil { continue @@ -411,30 +409,28 @@ func Check(schemaName string, schemas map[string][][]any, data any) error { } for _, v := range vs { var got int64 - if single { - switch v := v.(type) { - case string: - got = int64(len(v)) - case []byte: - got = int64(len(v)) - case []any: - got = int64(len(v)) - case map[string]any: - got = int64(len(v)) - case uint64: - got = int64(v) - case int64: - got = v - default: - return &SchemaErr{BaseErr: BaseErr{ - SchemaName: schemaName, - CmdIdx: cmdIdx, - CmdName: cmdName, - Taken: taken, - Msg: "non len-able", - Data: v, - }} - } + switch v := v.(type) { + case string: + got = int64(len(v)) + case []byte: + got = int64(len(v)) + case []any: + got = int64(len(v)) + case map[string]any: + got = int64(len(v)) + case uint64: + got = int64(v) + case int64: + got = v + default: + return &SchemaErr{BaseErr: BaseErr{ + SchemaName: schemaName, + CmdIdx: cmdIdx, + CmdName: cmdName, + Taken: taken, + Msg: "non len-able", + Data: v, + }} } ok = false switch cmdName { diff --git a/spec/schema/cmds b/spec/schema/cmds index 47a936d..9bf4ea5 100644 --- a/spec/schema/cmds +++ b/spec/schema/cmds @@ -12,6 +12,7 @@ TAKE | [".", k] If "k" is integer, then choose the k-th value in a list. If "k" equals to ".", then choose the element you are currently in. Command never fails, but key can be non-existent. + All following commands will be applied to the taken value. EXISTS | ["E"] Assure that chosen element exists. @@ -59,6 +60,7 @@ EQ | ["=", v] Check that chosen (if it exists) element's value equals to binary string "v". +=> https://datatracker.ietf.org/doc/html/rfc8610 CDDL For example let's check "our" structure, described in CDDL as: ai = text .gt 0 @@ -72,23 +74,17 @@ Corresponding schema can be: {"our": [ [".", "a"], ["E"], - [".", "a"], ["T", "STR"], - [".", "a"], [">", 0], [".", "v"], ["E"], - [".", "v"], ["T", "BIN", "STR"], [".", "fpr"], ["E"], - [".", "fpr"], ["T", "BIN"], - [".", "fpr"], [">", 31], - [".", "fpr"], ["<", 33], [".", "comment"], @@ -106,28 +102,23 @@ Here is an example with multiple schemas: "where": [ [".", "."], ["T", "LIST"], - [".", "."], [">", 1], - [".", "."], ["<", 3], - [".", "."], ["*"], ["T", "INT"], + [".", 0], [">", -91], - [".", 0], ["<", 91], + [".", 1], [">", -181], - [".", 1], ["<", 181], ], "wheres": [ [".", "."], ["T", "LIST"], - [".", "."], [">", 0], - [".", "."], ["*"], ["S", "where"], ], diff --git a/spec/schema/index b/spec/schema/index index 610c99b..237b18b 100644 --- a/spec/schema/index +++ b/spec/schema/index @@ -8,16 +8,16 @@ have necessary types of fields, satisfying lengths of the lists/maps or strings, and so on. In most cases those checks covers nearly everything when you sanitise the structures. -So suggestion is to specify those steps for some kind of very simple -minimalistic validation machine, that interprets them, executing -validation commands ([schema/cmds]) against the provided data -structures. That "machine" should be simple enough to be able to -implement it quickly and with sane amount of code. Validation steps -should be easily decodable and conveniently parsed even in C-language. +So suggestion is to specify those steps for some kind of the +interpreter, that executes validation commands ([schema/cmds]) against +the provided data structures. That interpreter should be simple enough +to be able to implement it quickly and with sane amount of code. +Validation steps should be easily decodable and conveniently parsed even +in C-language. Let's use KEKS format itself for the serialised validation steps! And generate them from higher level language/code ([schema/tcl]), convenient for humans. [schema/tcl] -> [schema/cmds] -> keks-encode() => schema - validate(schema, keks-decode(data)) + validate(schema, data) diff --git a/spec/schema/tcl b/spec/schema/tcl index 73b8555..857c0db 100644 --- a/spec/schema/tcl +++ b/spec/schema/tcl @@ -1,10 +1,9 @@ -Validation commands are pretty low-level and are inconvenient to write -by hand, at least because of huge quantity of TAKEs. -tcl/schema.tcl utility gives ability to convert much more nicer schemas -written on Tcl language to the KEKS-encoded commands. We call those -Tcl-written schemas "KEKS/Schema". +Validation commands ([schema/cmds]) are pretty low-level and are +inconvenient to write by hand. tcl/schema.tcl utility gives ability to +convert much more nicer schemas written on Tcl language to the +KEKS-encoded commands. We call those Tcl-written schemas "KEKS/Schema". -Example with "our" structure ([schema/cmds]) can be written as: +Example with "our" structure (from [schema/cmds]) can be written as: ai {{field . {str} >0}} fpr {{field . {bin} len=32}} @@ -22,7 +21,7 @@ and [cm/pub/] as: << [schemas/pub-load.tcl] << [schemas/pub-sig-tbs.tcl] -schema.tcl calls "schemas {s0 cmds0 s1 cmds1 ...}" commands to produce +schema.tcl calls "schemas {s0 cmds0 s1 cmds1 ...}" command to produce an encoded map with "cmds*" commands for "s*" schemas. "field" command helps creation of commands related to the field. @@ -30,10 +29,10 @@ an encoded map with "cmds*" commands for "s*" schemas. Its first argument is either field's name in the map, or list's index or dot, meaning the self-structure itself. -Second argument is a list of allowable types, written in lowercase. If -that list consists of "with S", then SCHEMA command will be called -instead of TYPE checking. If list consists of "set", then it is -checked to be a MAP with EACH value of NIL. +Second argument is a list of allowable types, written in lowercase. +If that list equals to {with S}, then {SCHEMA s} command will be called +instead of TYPE checking. If list equals to {set}, then it is checked +to be a MAP with EACH value of NIL. All other arguments are optional. @@ -44,11 +43,16 @@ you can specify empty list of types in second argument. ">n" and "0" assures that either list/map or strings are not -empty. "len=n" checks the exact length. "=v" checks that given -element has specified string/binary value (use "len=" for integers). +empty. + +"len=n" checks the exact length. + +"=v" checks that given element has specified string/binary value +(use "len=" for integers). "prec=p" issues TIMEPREC command, but instead of specifying the raw integer values, you choose one of: s, ms, us, ns, ps, fs, as. + "utc" issues UTC command. {of s} argument issues checking of EACH element of the list or map diff --git a/tcl/schema.tcl b/tcl/schema.tcl index 35f7671..60c528e 100755 --- a/tcl/schema.tcl +++ b/tcl/schema.tcl @@ -50,47 +50,39 @@ set knownTypes {bin blob bool hexlet int list magic map nil set str tai} proc field {k types args} { upvar _cmds _cmds buf buf + lappend _cmds [TAKE $k] if {[lindex $types 0] == "with"} { - lappend _cmds [TAKE $k] lappend _cmds [SCHEMA [lindex $types 1]] } elseif {[lsearch -exact $types set] == -1} { if {[llength $types] != 0} { - lappend _cmds [TAKE $k] lappend _cmds [TYPE $types] } } else { if {[llength $types] != 1} { error "set can be the only one among types" } - lappend _cmds [TAKE $k] lappend _cmds [TYPE {MAP}] - lappend _cmds [TAKE $k] lappend _cmds [EACH] lappend _cmds [TYPE {NIL}] + lappend _cmds [TAKE $k] } if {[lsearch -exact $args !exists] != -1} { - lappend _cmds [TAKE $k] lappend _cmds [!EXISTS] } elseif {[lsearch -exact $args optional] == -1} { - lappend _cmds [TAKE $k] lappend _cmds [EXISTS] } set i [lsearch -glob $args "len=*"] if {$i != -1} { set n [string range [lindex $args $i] 4 end] - lappend _cmds [TAKE $k] lappend _cmds [GT [expr {$n - 1}]] - lappend _cmds [TAKE $k] lappend _cmds [LT [expr {$n + 1}]] } set i [lsearch -glob $args ">*"] if {$i != -1} { - lappend _cmds [TAKE $k] lappend _cmds [GT [string range [lindex $args $i] 1 end]] } set i [lsearch -glob $args "<*"] if {$i != -1} { - lappend _cmds [TAKE $k] lappend _cmds [LT [string range [lindex $args $i] 1 end]] } set i [lsearch -glob $args "prec=*"] @@ -98,22 +90,18 @@ proc field {k types args} { set p [string range [lindex $args $i] 5 end] variable timeprecArgs set p [dict get $timeprecArgs $p] - lappend _cmds [TAKE $k] lappend _cmds [TIMEPREC $p] } if {[lsearch -exact $args utc] != -1} { - lappend _cmds [TAKE $k] lappend _cmds [UTC] } set i [lsearch -glob $args "=*"] if {$i != -1} { - lappend _cmds [TAKE $k] lappend _cmds [EQ [string range [lindex $args $i] 1 end]] } set i [lsearch -glob $args "of *"] if {$i != -1} { set s [lindex [lindex $args $i] 1] - lappend _cmds [TAKE $k] lappend _cmds [EACH] variable knownTypes if {[lsearch -exact $knownTypes $s] == -1} { -- 2.50.0