Simpler blob

author Sergey Matveev <stargrave@stargrave.org>

Wed, 9 Oct 2024 11:04:50 +0000 (14:04 +0300)

committer Sergey Matveev <stargrave@stargrave.org>

Wed, 9 Oct 2024 11:05:00 +0000 (14:05 +0300)
author Sergey Matveev <stargrave@stargrave.org>
Wed, 9 Oct 2024 11:04:50 +0000 (14:04 +0300)
committer Sergey Matveev <stargrave@stargrave.org>
Wed, 9 Oct 2024 11:05:00 +0000 (14:05 +0300)
diff --git a/cyac/cmd/print.c b/cyac/cmd/print.c

index 214fc3fd129b03bb24baceacc4fcb76c4a1f18684b9941ba74745f75318d3fca..b4771d711bec6b07ea9775fbfd032e50d5f34d0d553bfa97e7b08e03e61ea98a 100644 (file)
--- a/cyac/cmd/print.c
+++ b/cyac/cmd/print.c
@@ -139,7 +139,7 @@ myCb(
          fputs("}\n", stdout);
          break;
      case YACItemBlob:
-        fputs("BLOB(\n", stdout);
+        printf("BLOB(l=%zu\n", atom->val.uint);
          state->indent++;
          err = YACIterBlob(cbState, atom, off, buf, len, myCb);
          if (err != YACErrNo) {
@@ -202,14 +202,6 @@ myCb(
          fprintf(stdout, "\"%s\"\n", hex);
          free(hex);
          break;
-    case YACItemChunk:
-        hex = HexEnc(atom->val.buf, atom->len);
-        fprintf(stdout, "%s\n", hex);
-        free(hex);
-        break;
-    case YACItemChunkLen:
-        fprintf(stdout, "l=%zu\n", atom->val.uint);
-        break;
      case YACItemRaw:
          hex = HexEnc(atom->val.buf, atom->len);
          fprintf(stdout, "(t=0x%X l=%zu v=%s)\n", atom->tag, atom->len, hex);
diff --git a/cyac/dec.c b/cyac/dec.c

index 22cc80307adc8b742d21d0254e343c273c6a2f6a2c45b22fc0faa95c5ccabc9a..88650d9893063a91676fa33e0c137a24fe9f188ec94365a8e699f10287008f5a 100644 (file)
--- a/cyac/dec.c
+++ b/cyac/dec.c
@@ -24,6 +24,9 @@
  enum YACErr
  YACAtomDecode(struct YACAtom *atom, const unsigned char *buf, const size_t len)
  {
+    atom->len = 0;
+    atom->tag = 0;
+    atom->typ = 0;
      atom->off = 1;
      if (len < 1) {
          return YACErrNotEnough;
@@ -150,6 +153,17 @@ YACAtomDecode(struct YACAtom *atom, const unsigned char *buf, const size_t len)
          break;
      case YACAtomBlob:
          atom->typ = YACItemBlob;
+        atom->off += 8;
+        if ((ptrdiff_t)len < atom->off) {
+            return YACErrNotEnough;
+        }
+        atom->val.uint = yacFromBE(buf + 1, 8);
+        if (atom->val.uint == 0) {
+            return YACErrBlobBadLen;
+        }
+        if (atom->val.uint > ((uint64_t)(1) << 60)) {
+            return YACErrLenTooBig;
+        }
          break;
  
      case YACAtomFloat16:
diff --git a/cyac/dec.h b/cyac/dec.h

index 4fa742ec30d9c93142330ed696deafaeb0daf572aa63004d1e938fbb7444bac9..0fa7ba07aa85e9982ac6bc6fbc92c3e6a17754c4341fb5bbc738005e9b707614 100644 (file)
--- a/cyac/dec.h
+++ b/cyac/dec.h
@@ -15,13 +15,11 @@ enum YACItemType {
      YACItemList,
      YACItemMap,
      YACItemBlob,
-    YACItemFloat,    // atom.val.flt
-    YACItemTAI64,    // atom.val.buf, atom.len
-    YACItemBin,      // atom.val.buf, atom.len
-    YACItemStr,      // atom.val.buf, atom.len
-    YACItemChunk,    // atom.val.buf, atom.len, thrown by YACIterBlob
-    YACItemChunkLen, // atom.val.uint, thrown by YACIterBlob
-    YACItemRaw,      // atom.tag, atom.val.buf, atom.len
+    YACItemFloat, // atom.val.flt
+    YACItemTAI64, // atom.val.buf, atom.len
+    YACItemBin,   // atom.val.buf, atom.len
+    YACItemStr,   // atom.val.buf, atom.len
+    YACItemRaw,   // atom.tag, atom.val.buf, atom.len
  };
  
  enum YACErr {
@@ -51,7 +49,7 @@ struct YACAtom {
      ptrdiff_t off; // length of the whole atom
      size_t len;    // length of the strings, TAI64, raw values
      union {
-        uint64_t uint;            // unsigned integer's value
+        uint64_t uint;            // unsigned integer's value, blob's chunk len
          int64_t sint;             // negative signed integer's value
          const unsigned char *buf; // strings, TAI64, UUID value
      } val;
diff --git a/cyac/enc.c b/cyac/enc.c

index 84e95a5d103a0812846aad7c0b99a7856bbaa526ff60268f78a3c4defe6301ff..765d4526f7841fbb52005cdff891ffae71c770acc93c69c001a4a53923a83b25 100644 (file)
--- a/cyac/enc.c
+++ b/cyac/enc.c
@@ -140,15 +140,12 @@ YACAtomMapEncode(unsigned char *buf, const size_t cap)
  ptrdiff_t
  YACAtomBlobEncode(unsigned char *buf, const size_t cap, const size_t chunkLen)
  {
-    if (cap < 1) {
-        return -1;
+    if (cap < 1 + 8) {
+        return -(1 + 8);
      }
      buf[0] = YACAtomBlob;
-    ptrdiff_t res = YACAtomUintEncode(buf + 1, cap - 1, (uint64_t)chunkLen);
-    if (res <= 0) {
-        return res;
-    }
-    return res + 1;
+    yacToBE(buf + 1, 8, chunkLen);
+    return 1 + 8;
  }
  
  static ptrdiff_t
diff --git a/cyac/iter.c b/cyac/iter.c

index 9405ccad3b660c66a60a6ada402c187a82cfea18fa2ab127797e59ff295a7a5e..9d810457cbb27c96fea6299af796f834d8478ad48d6719d22f65d54d193be6da 100644 (file)
--- a/cyac/iter.c
+++ b/cyac/iter.c
@@ -15,7 +15,6 @@
  
  #include <stdbool.h>
  #include <stddef.h>
-#include <stdint.h>
  #include <string.h>
  
  #include "dec.h"
@@ -111,26 +110,8 @@ YACIterBlob(
      const size_t len,
      YACIterCb cb)
  {
-    enum YACErr err = YACAtomDecode(atom, buf + *off, (size_t)((ptrdiff_t)len - *off));
-    if (err != YACErrNo) {
-        return err;
-    }
-    (*off) += atom->off;
-    if (atom->typ != YACItemUint) {
-        return YACErrBlobBadLen;
-    }
      const size_t chunkLen = atom->val.uint;
-    if (chunkLen == 0) {
-        return YACErrBlobBadLen;
-    }
-    if (chunkLen > ((uint64_t)(1) << 60)) {
-        return YACErrLenTooBig;
-    }
-    atom->typ = YACItemChunkLen;
-    err = cb(NULL, 0, -1, cbState, atom, off, buf, len);
-    if (err != YACErrNo) {
-        return err;
-    }
+    enum YACErr err = YACErrInvalid;
      bool eoc = false;
      for (size_t n = 0; !eoc; n++) {
          err = YACAtomDecode(atom, buf + *off, (size_t)((ptrdiff_t)len - *off));
@@ -143,7 +124,7 @@ YACIterBlob(
              if (((ptrdiff_t)len - *off) <= (ptrdiff_t)chunkLen) {
                  return YACErrBlobShortChunk;
              }
-            atom->typ = YACItemChunk;
+            atom->typ = YACItemBin;
              atom->val.buf = buf + *off;
              atom->len = chunkLen;
              (*off) += chunkLen;
@@ -166,8 +147,6 @@ YACIterBlob(
          case YACItemFloat:
          case YACItemTAI64:
          case YACItemStr:
-        case YACItemChunk:
-        case YACItemChunkLen:
          case YACItemRaw:
          default:
              return YACErrBlobBadAtom;
diff --git a/gyac/dec.go b/gyac/dec.go

index 78ed7c6cb00ce6a91ad9b1e95a5686b7e6e9e1eb024462cce1670bf82536425d..ef552b8b764493ea921ee80f30e1c4eb98cc75511cdc3e14df813cdbd24e097b 100644 (file)
--- a/gyac/dec.go
+++ b/gyac/dec.go
@@ -205,6 +205,21 @@ func AtomDecode(buf []byte) (item *Item, off int, err error) {
                 item.T = byte(ItemMap)
         case AtomBlob:
                 item.T = byte(ItemBlob)
+               off += 8
+               if len(buf) < off {
+                       err = ErrNotEnough
+                       return
+               }
+               chunkLen := FromBE(buf[1 : 1+8])
+               if chunkLen == 0 {
+                       err = ErrBlobBadLen
+                       return
+               }
+               if chunkLen > (1 << 60) {
+                       err = ErrLenTooBig
+                       return
+               }
+               item.V = chunkLen
  
         case AtomFloat16, AtomFloat32, AtomFloat64, AtomFloat128, AtomFloat256:
                 var l int
@@ -338,26 +353,9 @@ func DecodeItem(buf []byte) (item *Item, tail []byte, err error) {
                 item.V = v
                 return
         case ItemBlob:
-               var sub *Item
-               sub, buf, err = DecodeItem(buf)
-               tail = buf
-               if err != nil {
-                       return
-               }
-               if sub.T != byte(ItemUInt) {
-                       err = ErrBlobBadLen
-                       return
-               }
-               if sub.V.(uint64) > (1 << 60) {
-                       err = ErrLenTooBig
-                       return
-               }
-               chunkLen := int(sub.V.(uint64))
-               if chunkLen == 0 {
-                       err = ErrBlobBadLen
-                       return
-               }
+               chunkLen := int(item.V.(uint64))
                 v := &Blob{ChunkLen: chunkLen}
+               var sub *Item
         BlobCycle:
                 for {
                         sub, buf, err = DecodeItem(buf)
diff --git a/gyac/enc.go b/gyac/enc.go

index a9350e71f57ef8ca0647492c1a753cfd5459b81af816ffa5616c7f5b99282189..8dca15afb57fd0ad793a3f1706e49c3358bd33e1f4b59d1f319c78eb3bd0d47b 100644 (file)
--- a/gyac/enc.go
+++ b/gyac/enc.go
@@ -213,7 +213,10 @@ func AtomMapEncode(buf []byte) []byte {
  }
  
  func AtomBlobEncode(buf []byte, chunkLen int) []byte {
-       return AtomUIntEncode(append(buf, byte(AtomBlob)), uint64(chunkLen))
+       l := make([]byte, 9)
+       l[0] = byte(AtomBlob)
+       ToBE(l[1:], uint64(chunkLen))
+       return append(buf, l...)
  }
  
  func atomStrEncode(buf, data []byte, utf8 bool) []byte {
diff --git a/pyac/pyac.py b/pyac/pyac.py

index 6ac96680e740915e09fab4bd81f1727ff1eec442e165508abe428db23e0f0d41..64c7fddff7d1dee1d0985f1043944bd72b1ad7dfa05d148c6b4fb9bdf023d1c1 100644 (file)
--- a/pyac/pyac.py
+++ b/pyac/pyac.py
@@ -297,7 +297,7 @@ class Blob:
          self.l = l
  
      def encode(self):
-        raws = [self.tags[0].to_bytes(1, "big"), Int(self.l).encode()]
+        raws = [self.tags[0].to_bytes(1, "big"), self.l.to_bytes(8, "big")]
          chunks = len(self.v) // (self.l)
          for i in range(chunks):
              raws.append(Nil().encode())
@@ -315,8 +315,10 @@ class Blob:
          if data[0] != klass.tags[0]:
              raise WrongTag
          data = data[1:]
-        l, data = Int.decode(data)
-        l = l.v
+        if len(data) < 8:
+            raise NotEnoughData(8)
+        l = int.from_bytes(data[:8], "big")
+        data = data[8:]
          vs = []
          while True:
              v, data = Decode(data)
diff --git a/spec/encoding/blob.texi b/spec/encoding/blob.texi

index c0c2d6bf377310c16739194993b011ec074362d5fd4e4fa40be37e50cb08d1e8..76024d5b362e0d26793f042db7ceb9e5c3779996ddbcf1a4cc2481b2f38d36cf 100644 (file)
--- a/spec/encoding/blob.texi
+++ b/spec/encoding/blob.texi
@@ -5,24 +5,24 @@
  Blob (binary large object) allows you to transfer binary data in chunks,
  in a streaming way, when data may not fit in memory at once.
  
-Positive non-zero @ref{Integers, INT} must follow the BLOB tag, setting
-the following chunks payload size. Then come zero or more NIL tags with
-fixed-length payload after each of them. Blob is terminated by
-@ref{Strings, BIN}, probably having zero length.
+64-bit big-endian integer follows the BLOB tag, setting the following
+chunks payload size. Then come zero or more NIL tags with fixed-length
+payload after each of them. Blob is terminated by @ref{Strings, BIN},
+probably having zero length.
  
  Data format definition must specify exact chunk size expected to be
  used, if it needs deterministic encoding.
  
  @verbatim
-BLOB INT [NIL || payload0 || NIL || payload1 || ...] BIN
+BLOB len [NIL || payload0 || NIL || payload1 || ...] BIN
  @end verbatim
  
  @multitable @columnfractions .5 .5
  
-@item BLOB(5, "") @tab @code{0B 45 80}
-@item BLOB(5, "12345") @tab @code{0B 45 01 3132333435 80}
-@item BLOB(5, "123456") @tab @code{0B 45 01 3132333435 81 36}
-@item BLOB(500, "123") @tab @code{0B 2101F4 83 313233}
-@item BLOB(2, "12345") @tab @code{0B 42 01 3132 01 3334 81 35}
+@item BLOB(5, "") @tab @code{0B 0000000000000005 80}
+@item BLOB(5, "12345") @tab @code{0B 0000000000000005 01 3132333435 80}
+@item BLOB(5, "123456") @tab @code{0B 0000000000000005 01 3132333435 81 36}
+@item BLOB(500, "123") @tab @code{0B 00000000000001F4 83 313233}
+@item BLOB(2, "12345") @tab @code{0B 0000000000000002 01 3132 01 3334 81 35}
  
  @end multitable
diff --git a/tyac/tyac.tcl b/tyac/tyac.tcl

index 7ed9d8bbd48b80af396812c1469570c5f0ccdebe76096549d39a7711e8c87c13..1a1286715daf3bcc57213bda4b5c461c909119816559b736d14ed6f77ae0d572 100644 (file)
--- a/tyac/tyac.tcl
+++ b/tyac/tyac.tcl
@@ -133,7 +133,7 @@ proc MAP {pairs} {
  
  proc BLOB {chunkLen v} {
      char [expr 0x0B]
-    INT $chunkLen
+    toBE 8 $chunkLen
      set vl [string length $v]
      set chunks [expr {$vl / $chunkLen}]
      for {set i 0} {$i < $chunks} {incr i} {
author	Sergey Matveev <stargrave@stargrave.org>
	Wed, 9 Oct 2024 11:04:50 +0000 (14:04 +0300)
committer	Sergey Matveev <stargrave@stargrave.org>
	Wed, 9 Oct 2024 11:05:00 +0000 (14:05 +0300)
cyac/cmd/print.c		patch \| blob \| history
cyac/dec.c		patch \| blob \| history
cyac/dec.h		patch \| blob \| history
cyac/enc.c		patch \| blob \| history
cyac/iter.c		patch \| blob \| history
gyac/dec.go		patch \| blob \| history
gyac/enc.go		patch \| blob \| history
pyac/pyac.py		patch \| blob \| history
spec/encoding/blob.texi		patch \| blob \| history
tyac/tyac.tcl		patch \| blob \| history