Deterministic decoding of blob lengths

author Sergey Matveev <stargrave@stargrave.org>

Fri, 18 Oct 2024 10:16:20 +0000 (13:16 +0300)

committer Sergey Matveev <stargrave@stargrave.org>

Fri, 18 Oct 2024 10:16:20 +0000 (13:16 +0300)
author Sergey Matveev <stargrave@stargrave.org>
Fri, 18 Oct 2024 10:16:20 +0000 (13:16 +0300)
committer Sergey Matveev <stargrave@stargrave.org>
Fri, 18 Oct 2024 10:16:20 +0000 (13:16 +0300)
diff --git a/cyac/lib/dec.c b/cyac/lib/dec.c

index b8cfd1de381de465fe087c039f22d1fb780bc07f6d2717ce18db223e24dbde3e..2b3b19c19e94ae388eb98b52a42bf9f033783a535d5adc68b167444101ae0c6a 100644 (file)
--- a/cyac/lib/dec.c
+++ b/cyac/lib/dec.c
@@ -173,13 +173,10 @@ YACAtomDecode(
              return YACErrNotEnough;
          }
          const uint64_t chunkLen = yacFromBE(buf + 1, 8);
-        if (chunkLen > SIZE_MAX) {
+        if (chunkLen > (SIZE_MAX - 1)) {
              return YACErrLenTooBig;
          }
-        if (chunkLen == 0) {
-            return YACErrBlobBadLen;
-        }
-        atom->v.blob.chunkLen = (size_t)chunkLen;
+        atom->v.blob.chunkLen = (size_t)chunkLen + 1;
          break;
      }
  
diff --git a/cyac/lib/enc.c b/cyac/lib/enc.c

index 22509bfcb8c4727dd3cde2bb4ff89b60227042624f03dd5795959ea5bee43b38..63b350bc8f33fd60e8a48f0dcd92d91949ffaffbffb04fb22351ec6261abf613 100644 (file)
--- a/cyac/lib/enc.c
+++ b/cyac/lib/enc.c
@@ -159,7 +159,7 @@ YACAtomBlobEncode(
          return false;
      }
      buf[0] = YACAtomBlob;
-    yacToBE(buf + 1, 8, (uint64_t)chunkLen);
+    yacToBE(buf + 1, 8, (uint64_t)chunkLen - 1);
      return true;
  }
  
diff --git a/cyac/lib/err.c b/cyac/lib/err.c

index 5908e3f47e85d25e58d604540769c0e8b6e9b66ac13bd512548461bf079df99b..6f547482b4e9d25e7dba4ac43f1a9a2b541b745ffc42d0740b1eb5ee13797b1a 100644 (file)
--- a/cyac/lib/err.c
+++ b/cyac/lib/err.c
@@ -18,8 +18,6 @@ YACErr2Str(const enum YACErr err)
          return "BadUTF8";
      case YACErrIntNonMinimal:
          return "IntNonMinimal";
-    case YACErrBlobBadLen:
-        return "BlobBadLen";
      case YACErrBlobBadAtom:
          return "BlobBadAtom";
      case YACErrBlobBadTerm:
diff --git a/cyac/lib/err.h b/cyac/lib/err.h

index 711d81bb14921a77cd684007a5d6ea2c6eb907c88ebcd8b04d452334438d6e1d..64bf0aa1c76f89d6d572cc1b8eb5a9d32d02ea14ccc7b1954dac3a11ed18b562 100644 (file)
--- a/cyac/lib/err.h
+++ b/cyac/lib/err.h
@@ -17,8 +17,6 @@
  //     Invalid UTF-8 codepoint or zero byte met.
  // @item YACErrIntNonMinimal
  //     Non minimal integer encoding.
-// @item YACErrBlobBadLen
-//     Blob with invalid chunk length.
  // @item YACErrBlobBadAtom
  //     Blob contains unexpected atom.
  // @item YACErrBlobBadTerm
@@ -49,7 +47,6 @@ enum YACErr {
      YACErrLenTooBig,
      YACErrBadUTF8,
      YACErrIntNonMinimal,
-    YACErrBlobBadLen,
      YACErrBlobBadAtom,
      YACErrBlobBadTerm,
      YACErrTAI64TooBig,
diff --git a/gyac/dec.go b/gyac/dec.go

index 3d6c7e279fc6d50f64861e6d750635d77fc7e8fc26ac27fac899a128a4ccb29b..860915593f23323e0f7af06cd214148d027be7a8e449a37268fee0a7b35976c3 100644 (file)
--- a/gyac/dec.go
+++ b/gyac/dec.go
@@ -73,7 +73,6 @@ var (
         ErrMapBadKey     = errors.New("map bad key")
         ErrMapUnordered  = errors.New("map unordered")
         ErrMapNoVal      = errors.New("map no value")
-       ErrBlobBadLen    = errors.New("blob bad len")
         ErrBlobBadAtom   = errors.New("blob unexpected atom")
         ErrBlobBadTerm   = errors.New("blob bad terminator")
  )
@@ -220,14 +219,11 @@ func AtomDecode(buf []byte) (item *Item, off int, err error) {
                         return
                 }
                 chunkLen := FromBE(buf[1 : 1+8])
-               if chunkLen == 0 {
-                       err = ErrBlobBadLen
-                       return
-               }
-               if chunkLen >= (1 << 63) {
+               if chunkLen >= (1<<63)-1 {
                         err = ErrLenTooBig
                         return
                 }
+               chunkLen++
                 item.V = chunkLen
  
         case AtomFloat16, AtomFloat32, AtomFloat64, AtomFloat128, AtomFloat256:
diff --git a/gyac/enc.go b/gyac/enc.go

index 419653e59a9ede2448c99edd122974a78ff81aebc0a14d399adb699258aa839c..05ffc1375108838917df42c5c6956007fd2361eeabae2a40940d1d63510651f6 100644 (file)
--- a/gyac/enc.go
+++ b/gyac/enc.go
@@ -215,7 +215,7 @@ func AtomMapEncode(buf []byte) []byte {
  func AtomBlobEncode(buf []byte, chunkLen int) []byte {
         l := make([]byte, 9)
         l[0] = byte(AtomBlob)
-       ToBE(l[1:], uint64(chunkLen))
+       ToBE(l[1:], uint64(chunkLen-1))
         return append(buf, l...)
  }
  
diff --git a/pyac/pyac.py b/pyac/pyac.py

index 0495bf7f9f78f05bd6c43090fcff929a1de6356ca6131cbaa69c8ea255d04536..3be038c827c37ee588fc2ec6c821675d80b6587324f2d4c99e9b070c269a1128 100644 (file)
--- a/pyac/pyac.py
+++ b/pyac/pyac.py
@@ -293,12 +293,12 @@ class Blob:
          if isinstance(v, Blob):
              v = v.v
              l = v.l
-        assert (l > 0) and (l <= ((1<<64)-1))
+        assert (l > 0) and (l <= (1<<64))
          self.v = v
          self.l = l
  
      def encode(self):
-        raws = [self.tags[0].to_bytes(1, "big"), self.l.to_bytes(8, "big")]
+        raws = [self.tags[0].to_bytes(1, "big"), (self.l - 1).to_bytes(8, "big")]
          chunks = len(self.v) // (self.l)
          for i in range(chunks):
              raws.append(Nil().encode())
@@ -318,7 +318,7 @@ class Blob:
          data = data[1:]
          if len(data) < 8:
              raise NotEnoughData(8)
-        l = int.from_bytes(data[:8], "big")
+        l = 1 + int.from_bytes(data[:8], "big")
          data = data[8:]
          vs = []
          while True:
diff --git a/spec/encoding/blob.texi b/spec/encoding/blob.texi

index 76024d5b362e0d26793f042db7ceb9e5c3779996ddbcf1a4cc2481b2f38d36cf..9f3c5722335cdc224a6103a751ede3550e5328fefb3f4a3ecfc0846ba0be2147 100644 (file)
--- a/spec/encoding/blob.texi
+++ b/spec/encoding/blob.texi
@@ -6,9 +6,9 @@ Blob (binary large object) allows you to transfer binary data in chunks,
  in a streaming way, when data may not fit in memory at once.
  
  64-bit big-endian integer follows the BLOB tag, setting the following
-chunks payload size. Then come zero or more NIL tags with fixed-length
-payload after each of them. Blob is terminated by @ref{Strings, BIN},
-probably having zero length.
+chunks payload size (+1). Then come zero or more NIL tags with
+fixed-length payload after each of them. Blob is terminated by
+@ref{Strings, BIN}, probably having zero length.
  
  Data format definition must specify exact chunk size expected to be
  used, if it needs deterministic encoding.
@@ -19,10 +19,10 @@ BLOB len [NIL || payload0 || NIL || payload1 || ...] BIN
  
  @multitable @columnfractions .5 .5
  
-@item BLOB(5, "") @tab @code{0B 0000000000000005 80}
-@item BLOB(5, "12345") @tab @code{0B 0000000000000005 01 3132333435 80}
-@item BLOB(5, "123456") @tab @code{0B 0000000000000005 01 3132333435 81 36}
-@item BLOB(500, "123") @tab @code{0B 00000000000001F4 83 313233}
-@item BLOB(2, "12345") @tab @code{0B 0000000000000002 01 3132 01 3334 81 35}
+@item BLOB(5, "") @tab @code{0B 0000000000000004 80}
+@item BLOB(5, "12345") @tab @code{0B 0000000000000004 01 3132333435 80}
+@item BLOB(5, "123456") @tab @code{0B 0000000000000004 01 3132333435 81 36}
+@item BLOB(500, "123") @tab @code{0B 00000000000001F3 83 313233}
+@item BLOB(2, "12345") @tab @code{0B 0000000000000001 01 3132 01 3334 81 35}
  
  @end multitable
diff --git a/tyac/tyac.tcl b/tyac/tyac.tcl

index 837351ccba3eecda90cf40530dd58a1c4e8f20e0f612787ef245c42ebc13acc3..f4232fdcb1664b1816586f7cde55fbe487c0015548962899b4137c87ecc645c4 100644 (file)
--- a/tyac/tyac.tcl
+++ b/tyac/tyac.tcl
@@ -136,7 +136,7 @@ proc MAP {pairs} {
  
  proc BLOB {chunkLen v} {
      char [expr 0x0B]
-    toBE 8 $chunkLen
+    toBE 8 [expr {$chunkLen - 1}]
      set vl [string length $v]
      set chunks [expr {$vl / $chunkLen}]
      for {set i 0} {$i < $chunks} {incr i} {
author	Sergey Matveev <stargrave@stargrave.org>
	Fri, 18 Oct 2024 10:16:20 +0000 (13:16 +0300)
committer	Sergey Matveev <stargrave@stargrave.org>
	Fri, 18 Oct 2024 10:16:20 +0000 (13:16 +0300)
cyac/lib/dec.c		patch \| blob \| history
cyac/lib/enc.c		patch \| blob \| history
cyac/lib/err.c		patch \| blob \| history
cyac/lib/err.h		patch \| blob \| history
gyac/dec.go		patch \| blob \| history
gyac/enc.go		patch \| blob \| history
pyac/pyac.py		patch \| blob \| history
spec/encoding/blob.texi		patch \| blob \| history
tyac/tyac.tcl		patch \| blob \| history