Deterministic decoding of string lengths

author Sergey Matveev <stargrave@stargrave.org>

Fri, 18 Oct 2024 09:17:55 +0000 (12:17 +0300)

committer Sergey Matveev <stargrave@stargrave.org>

Fri, 18 Oct 2024 09:17:55 +0000 (12:17 +0300)
author Sergey Matveev <stargrave@stargrave.org>
Fri, 18 Oct 2024 09:17:55 +0000 (12:17 +0300)
committer Sergey Matveev <stargrave@stargrave.org>
Fri, 18 Oct 2024 09:17:55 +0000 (12:17 +0300)
diff --git a/cyac/cmd/test-vector/test-vector.c b/cyac/cmd/test-vector/test-vector.c

index 3e5405477efcab4dc4abceb4df0bea4b1b4e424361a68ba7f79afc95654bab71..ddf9ea5d798eda9f396c5017cb60ea2b3aa2b20e05c0cced14874bde4cf009b2 100644 (file)
--- a/cyac/cmd/test-vector/test-vector.c
+++ b/cyac/cmd/test-vector/test-vector.c
@@ -25,7 +25,7 @@ main(int argc, char **argv)
      const size_t len = 68 * 1024;
      unsigned char *buf = malloc(len);
      assert(buf != NULL);
-    unsigned char *bin = malloc(1 << 16);
+    unsigned char *bin = malloc(1 << 17);
      assert(bin != NULL);
  
      adder(YACAtomMapEncode(&Got, buf + Off, len - Off)); // .
@@ -48,10 +48,14 @@ main(int argc, char **argv)
      adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 61));
      memset(bin, '2', 255);
      adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 255));
+    memset(bin, 'A', 61 + 255);
+    adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 61 + 255));
+    memset(bin, 'B', 62 + 255);
+    adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 62 + 255));
      memset(bin, '3', 1024);
      adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 1024));
-    memset(bin, '4', 1 << 16);
-    adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 1 << 16));
+    memset(bin, '4', 63 + 255 + 65535 + 1);
+    adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 63 + 255 + 65535 + 1));
      adder(YACAtomEOCEncode(&Got, buf + Off, len - Off)); // .str.bin
  
      adder(
diff --git a/cyac/lib/dec.c b/cyac/lib/dec.c

index 588744a0eefb1e04ef413f3bd36b0781e01de54f355653c11863f2b3e0ae30ec..b8cfd1de381de465fe087c039f22d1fb780bc07f6d2717ce18db223e24dbde3e 100644 (file)
--- a/cyac/lib/dec.c
+++ b/cyac/lib/dec.c
@@ -47,9 +47,11 @@ YACAtomDecode(
              break;
          case 62:
              ll = 2;
+            l += 255;
              break;
          case 63:
              ll = 8;
+            l += 255 + 65535;
              break;
          }
          if (ll != 0) {
@@ -57,14 +59,11 @@ YACAtomDecode(
              if (len < (*got)) {
                  return YACErrNotEnough;
              }
-            l = yacFromBE(buf + 1, ll);
-            if ((l < 61) || ((l < (1 << 8)) && (ll > 1)) ||
-                ((l < (1 << 16)) && (ll > 2))) {
-                return YACErrLenNonMinimal;
+            uint64_t ul = yacFromBE(buf + 1, ll);
+            if (ul > (SIZE_MAX - (63 + 255 + 65535))) {
+                return YACErrLenTooBig;
              }
-        }
-        if (l > SIZE_MAX) {
-            return YACErrLenTooBig;
+            l += ul;
          }
          const size_t ls = (size_t)l;
          (*got) += ls;
diff --git a/cyac/lib/enc.c b/cyac/lib/enc.c

index b737e25c1266a419720ecbd4daa929067f6a22e7324689fa8da14efa5f4aebb3..22509bfcb8c4727dd3cde2bb4ff89b60227042624f03dd5795959ea5bee43b38 100644 (file)
--- a/cyac/lib/enc.c
+++ b/cyac/lib/enc.c
@@ -175,18 +175,18 @@ yacAtomStrEncode(
      unsigned char lVal = 0;
      size_t lLen = 0;
      unsigned char lBuf[8] = {0};
-    if (srcLen >= ((uint64_t)1 << 16)) {
+    if (srcLen >= (63 + 255 + 65535)) {
          lVal = 63;
          lLen = 8;
-        yacToBE(lBuf, 8, (uint64_t)srcLen);
-    } else if (srcLen >= ((uint64_t)1 << 8)) {
+        yacToBE(lBuf, 8, (uint64_t)srcLen - (63 + 255 + 65535));
+    } else if (srcLen >= (62 + 255)) {
          lVal = 62;
          lLen = 2;
-        yacToBE(lBuf, 2, (uint64_t)srcLen);
-    } else if (srcLen > 60) {
+        yacToBE(lBuf, 2, (uint64_t)srcLen - (62 + 255));
+    } else if (srcLen >= 61) {
          lVal = 61;
          lLen = 1;
-        lBuf[0] = (unsigned char)(srcLen & 0xFF);
+        lBuf[0] = (unsigned char)((srcLen - 61) & 0xFF);
      } else {
          lVal = (unsigned char)srcLen;
      }
diff --git a/cyac/lib/err.c b/cyac/lib/err.c

index 204066525ae9eb6365e8267434f3213cd40d83857221db224faffbd8a9915d88..5908e3f47e85d25e58d604540769c0e8b6e9b66ac13bd512548461bf079df99b 100644 (file)
--- a/cyac/lib/err.c
+++ b/cyac/lib/err.c
@@ -12,8 +12,6 @@ YACErr2Str(const enum YACErr err)
          return "NotEnough";
      case YACErrUnknownType:
          return "UnknownType";
-    case YACErrLenNonMinimal:
-        return "LenNonMinimal";
      case YACErrLenTooBig:
          return "LenTooBig";
      case YACErrBadUTF8:
diff --git a/cyac/lib/err.h b/cyac/lib/err.h

index 1f35725f8ed63f6fa96ee803db6deebe1d2c5ee0285c95ce248db197984c9bee..711d81bb14921a77cd684007a5d6ea2c6eb907c88ebcd8b04d452334438d6e1d 100644 (file)
--- a/cyac/lib/err.h
+++ b/cyac/lib/err.h
@@ -11,8 +11,6 @@
  //     Not enough data. Atom's @code{.off} must contain how much.
  // @item YACErrUnknownType,    // unknown atom's type
  //     Unknown atom's type.
-// @item YACErrLenNonMinimal
-//     Non minimal string's length encoding.
  // @item YACErrLenTooBig,
  //     Too long string (>1<<60), can not be decoded.
  // @item YACErrBadUTF8
@@ -48,7 +46,6 @@ enum YACErr {
      YACErrNo = 1,
      YACErrNotEnough,
      YACErrUnknownType,
-    YACErrLenNonMinimal,
      YACErrLenTooBig,
      YACErrBadUTF8,
      YACErrIntNonMinimal,
diff --git a/cyac/lib/pki/cer.c b/cyac/lib/pki/cer.c

index 3448f1afb66a79fcdb27cf9d572e4b62774a651e37bbcda61d4a1b63c313926b..9929bf060310931a85ba0fe6d34509d68329239c95d283897a32406d3870e5ee 100644 (file)
--- a/cyac/lib/pki/cer.c
+++ b/cyac/lib/pki/cer.c
@@ -262,7 +262,7 @@ YACCerVerify(
          const size_t items = 5;
          struct YACItem tbsItems[5];
          memset(&tbsItems, 0, sizeof tbsItems);
-        struct YACItems tbs = {.list = tbsItems, items, items};
+        struct YACItems tbs = {.list = tbsItems, .offsets=NULL, .len=items, .cap=-1};
          tbsItems[0].atom.typ = YACItemMap;
          tbsItems[0].atom.v.list.head = 1;
  
diff --git a/gyac/cmd/test-vector-anys/main.go b/gyac/cmd/test-vector-anys/main.go

index 0f63d6bbe1fa39ca10e7bde32fd63d4f4416c6bbf513c2390f8d0679d902f949..0dadf2b3f0a28aa4432d646c2dc83653c3d0334f880d891fabcceee3da5488a1 100644 (file)
--- a/gyac/cmd/test-vector-anys/main.go
+++ b/gyac/cmd/test-vector-anys/main.go
@@ -42,8 +42,10 @@ func main() {
                                 bytes.Repeat([]byte{'0'}, 60),
                                 bytes.Repeat([]byte{'1'}, 61),
                                 bytes.Repeat([]byte{'2'}, 255),
+                               bytes.Repeat([]byte{'A'}, 61+255),
+                               bytes.Repeat([]byte{'B'}, 62+255),
                                 bytes.Repeat([]byte{'3'}, 1024),
-                               bytes.Repeat([]byte{'4'}, 1<<16),
+                               bytes.Repeat([]byte{'4'}, 63+255+65535+1),
                         },
                         "utf8": "привет мир",
                 },
diff --git a/gyac/cmd/test-vector-manual/main.go b/gyac/cmd/test-vector-manual/main.go

index aa0048b6a93cbd677937af81af986d27cf083a280e0b62665e29fb3257db4a43..5f7b0f4a833fe98dbde3549218a27158a2c105c629527b141012ebb3e3daedf2 100644 (file)
--- a/gyac/cmd/test-vector-manual/main.go
+++ b/gyac/cmd/test-vector-manual/main.go
@@ -30,8 +30,10 @@ func main() {
                                         buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'0'}, 60))
                                         buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'1'}, 61))
                                         buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'2'}, 255))
+                                       buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'A'}, 61+255))
+                                       buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'B'}, 62+255))
                                         buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'3'}, 1024))
-                                       buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'4'}, 1<<16))
+                                       buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'4'}, 63+255+65535+1))
                                 }
                                 buf = gyac.AtomEOCEncode(buf)
                                 {
diff --git a/gyac/dec.go b/gyac/dec.go

index 459b7c4026d6f4b39e4df967541345f94db8f49b942dadaee8908f8b1fd45767..3d6c7e279fc6d50f64861e6d750635d77fc7e8fc26ac27fac899a128a4ccb29b 100644 (file)
--- a/gyac/dec.go
+++ b/gyac/dec.go
@@ -66,7 +66,6 @@ func (raw *Raw) String() string {
  
  var (
         ErrNotEnough     = errors.New("not enough data")
-       ErrLenNonMinimal = errors.New("non-minimal len")
         ErrLenTooBig     = errors.New("string len >1<<60")
         ErrIntNonMinimal = errors.New("int non minimal")
         ErrUnknownType   = errors.New("unknown type")
@@ -99,8 +98,10 @@ func AtomDecode(buf []byte) (item *Item, off int, err error) {
                         ll = 1
                 case 62:
                         ll = 2
+                       l += ((1 << 8) - 1)
                 case 63:
                         ll = 8
+                       l += ((1 << 8) - 1) + ((1 << 16) - 1)
                 }
                 if ll != 0 {
                         off += ll
@@ -109,15 +110,11 @@ func AtomDecode(buf []byte) (item *Item, off int, err error) {
                                 return
                         }
                         ul := FromBE(buf[1 : 1+ll])
-                       if ul > (1 << 63) {
+                       if ul > (1<<63)-(63+((1<<8)-1)+((1<<16)-1)) {
                                 err = ErrLenTooBig
                                 return
                         }
-                       l = int(ul)
-                       if (l < 61) || ((l < (1 << 8)) && (ll > 1)) || ((l < (1 << 16)) && (ll > 2)) {
-                               err = ErrLenNonMinimal
-                               return
-                       }
+                       l += int(ul)
                 }
                 off += l
                 if off <= 0 {
diff --git a/gyac/enc.go b/gyac/enc.go

index b59d8deb18e3fc9abff9ff5536f186d67500ebf94f3b122dc54675d6c30358a6..419653e59a9ede2448c99edd122974a78ff81aebc0a14d399adb699258aa839c 100644 (file)
--- a/gyac/enc.go
+++ b/gyac/enc.go
@@ -220,19 +220,19 @@ func AtomBlobEncode(buf []byte, chunkLen int) []byte {
  }
  
  func atomStrEncode(buf, data []byte, utf8 bool) []byte {
-       lv := 0
+       var lv int
         var l []byte
-       if len(data) >= (1 << 16) {
+       if len(data) >= 63+((1<<8)-1)+((1<<16)-1) {
                 lv = 63
                 l = make([]byte, 8)
-               ToBE(l, uint64(len(data)))
-       } else if len(data) >= (1 << 8) {
+               ToBE(l, uint64(len(data)-(lv+((1<<8)-1)+((1<<16)-1))))
+       } else if len(data) >= 62+255 {
                 lv = 62
                 l = make([]byte, 2)
-               ToBE(l, uint64(len(data)))
-       } else if len(data) > 60 {
+               ToBE(l, uint64(len(data)-(lv+((1<<8)-1))))
+       } else if len(data) >= 61 {
                 lv = 61
-               l = []byte{byte(len(data))}
+               l = []byte{byte(len(data) - lv)}
         } else {
                 lv = len(data)
         }
diff --git a/pyac/pyac.py b/pyac/pyac.py

index c419ee549f1dabf39372e80550e7c9b93ab97d6a2a115540b9fab55a5fc2714d..0495bf7f9f78f05bd6c43090fcff929a1de6356ca6131cbaa69c8ea255d04536 100644 (file)
--- a/pyac/pyac.py
+++ b/pyac/pyac.py
@@ -19,7 +19,6 @@ class DecodeError(ValueError):
  
  
  WrongTag = DecodeError("wrong tag")
-NonMinimal = DecodeError("non-miminal encoding")
  
  
  class NotEnoughData(DecodeError):
@@ -183,7 +182,7 @@ class Int:
              raise NotEnoughData(l)
          v = int.from_bytes(data[:l], "big")
          if v < 32:
-            raise NonMinimal
+            raise DecodeError("non-miminal encoding")
          if neg:
              v = -1 - v
          return klass(v), data[l:]
@@ -515,14 +514,17 @@ class BaseString:
  
      def encode(self):
          l = len(self.v)
-        if l >= (1<<16):
+        if l >= (63 + ((1<<8)-1) + ((1<<16)-1)):
              lv = 63
+            l -= (lv + ((1<<8)-1) + ((1<<16)-1))
              lb = l.to_bytes(8, "big")
-        elif l >= (1<<8):
+        elif l >= (62 + ((1<<8)-1)):
              lv = 62
+            l -= (lv + ((1<<8)-1))
              lb = l.to_bytes(2, "big")
          elif l >= 61:
              lv = 61
+            l -= lv
              lb = l.to_bytes(1, "big")
          else:
              lv = l
@@ -545,18 +547,15 @@ class BaseString:
              llen = 1
          elif l == 62:
              llen = 2
+            l += ((1<<8)-1)
          elif l == 63:
              llen = 8
+            l += ((1<<8)-1) + ((1<<16)-1)
          data = data[1:]
          if llen > 0:
              if len(data) < llen:
                  raise NotEnoughData(llen)
-            l = int.from_bytes(data[:llen], "big")
-            if (
-                (l < 61) or ((l < (1<<8)) and (llen > 1)) or
-                ((l < (1<<16)) and (llen > 2))
-            ):
-                raise NonMinimal
+            l += int.from_bytes(data[:llen], "big")
              data = data[llen:]
          if len(data) < l:
              raise NotEnoughData(l)
diff --git a/pyac/test-vector.py b/pyac/test-vector.py

index 9a775d43897e1551ff0c0745081d0fa7524526cfd82c0486858718423e662f65..beb87103a0cd4af56f8f7d039469caefca0c35d1cd7c441128fc30db36d83ede 100644 (file)
--- a/pyac/test-vector.py
+++ b/pyac/test-vector.py
@@ -15,8 +15,10 @@ data = {
              60 * b"0",
              61 * b"1",
              255 * b"2",
+            (61+255) * b"A",
+            (62+255) * b"B",
              1024 * b"3",
-            (1<<16) * b"4",
+            (63+255+65535+1) * b"4",
          ],
          "utf8": "привет мир",
      },
diff --git a/spec/encoding/str.texi b/spec/encoding/str.texi

index be0e6e9d2e54d29f229f83f1613696fa2522559d35026d94a7e8bcae772c03fe..8f6607d1d2b20074beaab23720d28c417d43cce7908f96ecb9396f4894966dea 100644 (file)
--- a/spec/encoding/str.texi
+++ b/spec/encoding/str.texi
@@ -23,11 +23,11 @@ If length value equals to:
  @item 0-60
      Use as is.
  @item 61
-    Then next 8-bits are the actual length.
+    61 plus next 8-bits value.
  @item 62
-    Then next 16-bits (big-endian) are the actual length.
+    62 plus 255 plus next big-endian 16-bits value.
  @item 63
-    Then next 64-bits (big-endian) are the actual length.
+    63 plus 255 plus 65535 plus next big-endian 64-bits value.
  @end table
  
  String's length @strong{must} be encoded in shortest possible form.
@@ -41,7 +41,7 @@ Example representations:
  
  @item 0-byte binary string @tab @code{80}
  @item 4-byte binary string @code{0x01 0x02 0x03 0x04} @tab @code{84 01 02 03 04}
-@item 64-byte binary string with 0x41 @tab @code{BD 40 41 41 .. 41}
+@item 64-byte binary string with 0x41 @tab @code{BD 03 41 41 .. 41}
  @item UTF-8 string "привет мир" ("hello world" on russian) @tab
      @code{D3 D0 BF D1 80 D0 B8 D0 B2 D0 B5 D1 82 20 D0 BC D0 B8 D1 80}
  
diff --git a/tyac/test-vector.tcl b/tyac/test-vector.tcl

index bd508ddb71a4dd160f2a7a4a0b7ef9eed5388123cc2cb19b64141d4d678bb12e..abecf2c82e1465f4d7f4dcb692b6006d743ec4418d72923990415f3472cf8fb6 100644 (file)
--- a/tyac/test-vector.tcl
+++ b/tyac/test-vector.tcl
@@ -36,8 +36,10 @@ MAP {
              {BIN [string repeat "0" 60]}
              {BIN [string repeat "1" 61]}
              {BIN [string repeat "2" 255]}
+            {BIN [string repeat "A" [expr {61+255}]]}
+            {BIN [string repeat "B" [expr {62+255}]]}
              {BIN [string repeat "3" 1024]}
-            {BIN [string repeat "4" [expr {1 << 16}]]}
+            {BIN [string repeat "4" [expr {63+255+65535+1}]]}
          }}
      }}
      blob {LIST {
diff --git a/tyac/tyac.tcl b/tyac/tyac.tcl

index 1a1286715daf3bcc57213bda4b5c461c909119816559b736d14ed6f77ae0d572..837351ccba3eecda90cf40530dd58a1c4e8f20e0f612787ef245c42ebc13acc3 100644 (file)
--- a/tyac/tyac.tcl
+++ b/tyac/tyac.tcl
@@ -73,15 +73,18 @@ proc _str {atom v} {
      set ll 0
      set vl [string length $v]
      set lv $vl
-    if {$vl >= [expr {1 << 16}]} {
+    if {$vl >= [expr {63 + 255 + 65535}]} {
          set lv 63
          set ll 8
-    } elseif {$vl >= [expr {1 << 8}]} {
+        set vl [expr {$vl - 63 - 255 - 65535}]
+    } elseif {$vl >= [expr {62 + 255}]} {
          set lv 62
          set ll 2
+        set vl [expr {$vl - 62 - 255}]
      } elseif {$vl > 60} {
          set lv 61
          set ll 1
+        set vl [expr {$vl - 61}]
      }
      char [expr {$atom | $lv}]
      if {$ll > 0} { toBE $ll $vl }
author	Sergey Matveev <stargrave@stargrave.org>
	Fri, 18 Oct 2024 09:17:55 +0000 (12:17 +0300)
committer	Sergey Matveev <stargrave@stargrave.org>
	Fri, 18 Oct 2024 09:17:55 +0000 (12:17 +0300)
cyac/cmd/test-vector/test-vector.c		patch \| blob \| history
cyac/lib/dec.c		patch \| blob \| history
cyac/lib/enc.c		patch \| blob \| history
cyac/lib/err.c		patch \| blob \| history
cyac/lib/err.h		patch \| blob \| history
cyac/lib/pki/cer.c		patch \| blob \| history
gyac/cmd/test-vector-anys/main.go		patch \| blob \| history
gyac/cmd/test-vector-manual/main.go		patch \| blob \| history
gyac/dec.go		patch \| blob \| history
gyac/enc.go		patch \| blob \| history
pyac/pyac.py		patch \| blob \| history
pyac/test-vector.py		patch \| blob \| history
spec/encoding/str.texi		patch \| blob \| history
tyac/test-vector.tcl		patch \| blob \| history
tyac/tyac.tcl		patch \| blob \| history