Various trivial additions

author Sergey Matveev <stargrave@stargrave.org>

Wed, 30 Oct 2024 08:47:33 +0000 (11:47 +0300)

committer Sergey Matveev <stargrave@stargrave.org>

Wed, 30 Oct 2024 09:54:14 +0000 (12:54 +0300)
author Sergey Matveev <stargrave@stargrave.org>
Wed, 30 Oct 2024 08:47:33 +0000 (11:47 +0300)
committer Sergey Matveev <stargrave@stargrave.org>
Wed, 30 Oct 2024 09:54:14 +0000 (12:54 +0300)
diff --git a/spec/encoding/blob.texi b/spec/encoding/blob.texi

index 9f3c5722335cdc224a6103a751ede3550e5328fefb3f4a3ecfc0846ba0be2147..c3ba278d230a64d5cdf4f7b1f97ab9716ab4d4fc430e76519e65d2f083cc3128 100644 (file)
--- a/spec/encoding/blob.texi
+++ b/spec/encoding/blob.texi
@@ -1,14 +1,15 @@
  @node Blobs
  @cindex BLOB
+@cindex chunk
  @section Blobs
  
  Blob (binary large object) allows you to transfer binary data in chunks,
-in a streaming way, when data may not fit in memory at once.
+in a streaming way, when data may not fit in memory.
  
  64-bit big-endian integer follows the BLOB tag, setting the following
-chunks payload size (+1). Then come zero or more NIL tags with
-fixed-length payload after each of them. Blob is terminated by
-@ref{Strings, BIN}, probably having zero length.
+chunks payload size (+1). Then come zero or more NIL tags, each followed
+by fixed-length payload. Blob is terminated by @ref{Strings, BIN},
+probably having zero length.
  
  Data format definition must specify exact chunk size expected to be
  used, if it needs deterministic encoding.
diff --git a/spec/encoding/cont.texi b/spec/encoding/cont.texi

index 6c1774c926e9926d5c9bc1aae1f384273c0a04b74e1aee67bc8e3df45bbaeb18..c915bb49379c5a01739abe687a6119a1d61f370f243620cb86ef9a230a048c07 100644 (file)
--- a/spec/encoding/cont.texi
+++ b/spec/encoding/cont.texi
@@ -1,15 +1,19 @@
  @node Containers
+@cindex containers
  @section Containers
  
  Containers do not have any explicit length, but are terminated by EOC
  (end of contents) tag.
  
+@cindex LIST
+@cindex EOC
  LIST contains a concatenation of items of arbitrary type.
  
  @verbatim
  LIST [ITEM0 || ITEM1 || ...] EOC
  @end verbatim
  
+@cindex MAP
  MAP contains concatenation of @ref{Strings, STR(key)}-value pairs. Keys
  @strong{must} be non-empty, unique and length-first bytewise ascending ordered.
  
@@ -20,6 +24,10 @@ MAP [STR(KEY0) || ITEM0 || STR(KEY1) || ITEM1 || ... ] EOC
  Hint: Encoding code for known format can be ordered itself to emit
  values in an already properly sorted way.
  
+@cindex SET
+SET is emulated by using MAPs with NIL values. That gives only 1-byte
+overhead for each element, but reuses already existing code.
+
  Example representations:
  
  @multitable @columnfractions .5 .5
@@ -27,5 +35,6 @@ Example representations:
  @item LIST[] @tab @code{08 00}
  @item LIST[INT(123) FALSE] @tab @code{08 207B 02 00}
  @item MAP[foo: LIST["bar"]] @tab @code{09 C3666F6F 08 C3626172 00 00}
+@item SET[sig, dh] @tab @code{09 C26468 01 C3736967 01 00}
  
  @end multitable
diff --git a/spec/encoding/index.texi b/spec/encoding/index.texi

index 9126cfcbfe78f82fe4d455827b1ea0fbfbce2b4522dacfcbe44c991bb7adc2cd..0dfd7f018bfa9a431b5a764547a7fa53e6b631d9736cbde49a7c92f914caa9e9 100644 (file)
--- a/spec/encoding/index.texi
+++ b/spec/encoding/index.texi
@@ -2,7 +2,7 @@
  @cindex encoding
  @unnumbered Encoding
  
-YAC can store various primitive scalar types (strings, integers, ...),
+YAC can store various primitive scalar types (strings, integers, ...)
  and container types (lists, maps, ...). Serialisation process is just
  emitting the TLV-like encoding for each item recursively.
  
diff --git a/spec/encoding/int.texi b/spec/encoding/int.texi

index 0d71a408dfb17a096c7b112fb3cef46572cbeccafaac1e972da2593068751d16..df312552b45430570c5537d550ae35f2c2b2d80e197d8f6753fd26e4b4085047 100644 (file)
--- a/spec/encoding/int.texi
+++ b/spec/encoding/int.texi
@@ -13,9 +13,8 @@ Long form is encoded as a big-endian number of varying length.
  @code{*INT(len=1)..*INT(len=16)} types are for 8-, 16-, 24-, ...,
  128-bit integer representations.
  
-Shortest possible form @strong{must} be used. Leading zero bytes are
-@strong{forbidden}. Short values (<32) @strong{must} be encoded in a
-short form.
+Shortest possible form @strong{must} be used, that means no leading zero byte.
+Short values (<32) @strong{must} be encoded in a short form.
  
  Negative integers store their absolute value the same way as positive
  integers. After decoding, their value is subtracted from -1. Negative
@@ -23,9 +22,9 @@ value encoded as @code{0x02} means @code{-1 - 0x02 => -3}.
  
  Hint: both positive and negative long integer tag's value keeps the
  length in the last 16 bits. So there is no need in dealing with every
-reserved value. You can check first 4 bits of the header to determine is
-it positive or negative integer, and then treat remaining 4 bits as a
-length (+1).
+tag's reserved value. You can check first 4 bits of the header to
+determine is it positive or negative integer, and then treat remaining 4
+bits as a length (+1).
  
  Example representations:
  
diff --git a/spec/index.texi b/spec/index.texi

index 71fc1e210992bea74beb0222ee0c73aad2e6237eff0dd1914d5438c509e13c0e..cbb24e9cf0fbccbe58a32c0f85d9cea77873957ac9cd359dc544f2a6c1e59bde 100644 (file)
--- a/spec/index.texi
+++ b/spec/index.texi
@@ -23,7 +23,7 @@ of structured data. But why!?
  @item Its encoding must be deterministic -- there must be only a single
      representation of the structured data, allowing its usage in
      cryptography-related contexts.
-@item It should support enough data types to be able to replace JSON
+@item It should support enough data types for being able to replace JSON
      transparently.
  @end itemize
  
@@ -34,12 +34,7 @@ Are not there any satisfiable codecs?
  
  @multitable @columnfractions .30 .05 .05 .05 .05 .05
  
-@headitem name @tab
-    Schemaless @tab
-    Simple @tab
-    Deterministic @tab
-    Streamable @tab
-    Compact
+@headitem @tab Schemaless @tab Simple @tab Deterministic @tab Streamable @tab Compact
  
  @item ASN.1 @url{https://en.wikipedia.org/wiki/Distinguished_Encoding_Rules#DER_encoding, DER} @tab
      N @tab @strong{N} @tab Y @tab N @tab N
@@ -68,13 +63,7 @@ Are not there any satisfiable codecs?
  
  @multitable @columnfractions .30 .05 .05 .05 .05 .05 .05
  
-@headitem name @tab
-    Large strings @tab
-    Human strings @tab
-    Integers @tab
-    Lists @tab
-    Structures @tab
-    Datetime
+@headitem @tab Large strings @tab Human strings @tab Integers @tab Lists @tab Structures @tab Datetime
  
  @item ASN.1 DER @tab
      Y @tab Y @tab Y @tab Y @tab Y @tab Y
diff --git a/spec/install.texi b/spec/install.texi

index 09dbcb06583b368f89945062c4935fc7467e56906fe9ebdbef014f5d88ab12c7..2ede379bdd3c275b61911a046209bb595adcbe2e387ac3f643bf8ed240ac2d67 100644 (file)
--- a/spec/install.texi
+++ b/spec/install.texi
@@ -7,7 +7,8 @@ and Tcl. But all of them are currently badly covered with tests.
  @cindex git
  You can obtain development source code with
  @command{git clone git://git.cypherpunks.su/yac.git}
-(also you can use @url{https://git.cypherpunks.su/yac.git}).
+(also you can use @url{http://git.cypherpunks.su/yac.git},
+@url{https://git.cypherpunks.su/yac.git}).
  
  Also there is @url{https://yggdrasil-network.github.io/, Yggdrasil}
  accessible address: @url{http://y.www.yac.cypherpunks.su/}.
diff --git a/spec/rationale.texi b/spec/rationale.texi

index d34d4260f31563c22c10aa4ea215291707fc568c6e38eef62f09d5b6a66b0d09..1c091e05819eb0419befb2db12d64fedf1cc8d002726feba41a067756be79b35 100644 (file)
--- a/spec/rationale.texi
+++ b/spec/rationale.texi
@@ -4,49 +4,46 @@
  @itemize
  
  @item
-We do not want ASCII decimal parsing. This is not trivial and not very
-fast to load an integer. Although it is human readable and
-understandable. Also it is not compact.
+No ASCII decimal parsing. That is not trivial code, not fast, not
+compact. Although it is human readable and understandable.
  
  @item
-We do not want varints (where most significant bit means continuation)
-and zig-zag-like encoding. This is not trivial code, prohibiting fast
-integer load.
+No varints (where most significant bit means continuation) and
+zig-zag-like encoding. That is not trivial code.
  
  @item
-We do not want formats where maps and lists need to know their
-lengths/sizes in advance. That means no streaming possibility. That
-complicates encoder and requires more memory usage. Containers can be
-terminated with explicit signal tag.
+No formats where maps and lists need to know their lengths/sizes in
+advance. That means no streaming possibility. Complicates encoder and
+requires more memory usage.
  
  @item
-We want formats with ability to store maps/dictionaries/tables. Of
-course they can be emulated by reassembling lists, but that is manual
-action after the codec did his job.
+No formats without ability to store maps/dictionaries/tables. Of course
+they can be emulated by reassembling lists, but that is manual action
+after the codec did his job.
  
  @item
  Differentiation of binary and human-readable strings (UTF-8 for example)
  is a must for a format that is intended to be looked and analysed by a human.
  
  @item
-ISO-based (string) representation of data is a no: because it requires
-complex parsing and takes much space. Naive UNIX timestamp
-representation raises questions about its length and dealing with the
-dates before 1970. Moreover they are not suitable for tasks requiring
-monotonous clocks, because of UTC.
+No ISO-based (string) representation of datetime: it requires complex
+parsing and takes much space. Naive UNIX timestamp representation raises
+questions about its length and dealing with the dates before 1970.
+Moreover they are not suitable for tasks requiring monotonous clocks,
+because of UTC.
  
  @item
  No tagging ability, context specifying, marking, hinting, extension
-mechanism or anything like that. That brings huge complications to the
-state and questions when you do not know how to deal with unknown
-entities. Any unsupported data type must be a string, possibly enveloped
-in a map with additional data. @code{@{"cp": "koi8-r", "str": BIN(...)@}}.
+mechanism or anything like that. That brings complications to the state
+and questions with unknown entities. Any unsupported data type must be a
+string, possibly enveloped in a map with additional data.
+@code{@{"cp": "koi8-r", "str": BIN(...)@}}.
  
  @item
  Large (>2GiB) strings support is a must. Nowadays even a single
-multimedia file can easily exceed that size. General-purpose codec must
-be able to send it without complication of inventing your own chunked
-format.
+multimedia file can easily exceed that size. General-purpose codec
+should be able to send it without complication of inventing your own
+chunked format.
  
  @item
  Is not embedded strings length, like in YAC and CBOR, is a more
@@ -54,14 +51,14 @@ complicated code? Definitely. But there are so many short strings in a
  schemaless format for specifying map/structure keys. So many algorithm
  identifiers, that are also relatively short human-readable strings. So
  that is a compromise between slightly larger code and much shorter
-resulting structures, that is worth of it.
+resulting structures.
  
  @item
-We want clear distinguishing of continuous strings and streamable ones
-(BLOBs). ASN.1 CER does not distinguish them, making representation of
-every string in memory far from being convenient and easy to work with.
-Different tasks have different constraints: many of them do not need
-streamable strings at all, some of them may use them solely. YAC gives
-flexibility in choosing necessary data type for your needs.
+There should be clear distinguishing of continuous strings and
+streamable ones (BLOBs). ASN.1 CER does not do that, making
+representation of every string in memory far from being convenient and
+easy to work with. Different tasks have different constraints: many of
+them do not need streamable strings at all, some of them may use it
+solely.
  
  @end itemize
diff --git a/spec/schema.texi b/spec/schema.texi

index 78415e53db2e3e4958206dc97a4d6bdcfc3b7940df876c1bbc88599c0bb7fd4a..afcb2ada006a59c6dc7cf5f1531c1798cb668fbefd4323e9c687658fb974f030 100644 (file)
--- a/spec/schema.texi
+++ b/spec/schema.texi
@@ -30,9 +30,6 @@ identifiers. OIDs database can be considered as an external schema.
  Lacking it, or lacking its actual state, you probably won't be able even
  guessing the context of the data inside.
  
-Sets can be emulated by using MAPs with NIL values. That gives only
-1-byte overhead for each element, but reuses already existing code.
-
  If you really desire more compact encoding, even agree to use schema
  definitions, then think about replacing MAPs with LISTs. Non-present
  values can be indicated by NIL tag.
diff --git a/tyac/tyac.tcl b/tyac/tyac.tcl

index f4232fdcb1664b1816586f7cde55fbe487c0015548962899b4137c87ecc645c4..ee0ae7a00fc1fca8c676d82b6799411809621e5e61450f6d90a90cf315ea7109 100644 (file)
--- a/tyac/tyac.tcl
+++ b/tyac/tyac.tcl
@@ -134,6 +134,12 @@ proc MAP {pairs} {
      EOC
  }
  
+proc SET {v} {
+    set args [list]
+    foreach k $v { lappend args $k NIL }
+    MAP $args
+}
+
  proc BLOB {chunkLen v} {
      char [expr 0x0B]
      toBE 8 [expr {$chunkLen - 1}]
@@ -223,6 +229,6 @@ proc RAW {t v} {
  
  namespace export EOC NIL FALSE TRUE UUID INT STR BIN RAW
  namespace export TAI64 UTCFromISO
-namespace export LIST MAP LenFirstSort BLOB
+namespace export LIST MAP SET LenFirstSort BLOB
  
  }
author	Sergey Matveev <stargrave@stargrave.org>
	Wed, 30 Oct 2024 08:47:33 +0000 (11:47 +0300)
committer	Sergey Matveev <stargrave@stargrave.org>
	Wed, 30 Oct 2024 09:54:14 +0000 (12:54 +0300)
spec/encoding/blob.texi		patch \| blob \| history
spec/encoding/cont.texi		patch \| blob \| history
spec/encoding/index.texi		patch \| blob \| history
spec/encoding/int.texi		patch \| blob \| history
spec/index.texi		patch \| blob \| history
spec/install.texi		patch \| blob \| history
spec/rationale.texi		patch \| blob \| history
spec/schema.texi		patch \| blob \| history
tyac/tyac.tcl		patch \| blob \| history