From: Sergey Matveev <stargrave@stargrave.org>
Date: Mon, 24 Nov 2025 16:41:52 +0000 (+0300)
Subject: Slightly more compact BLOB
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=183ea2a5be76ecec3c8524128f124f8ce76bb8492539037e598e1cb463300791;p=keks.git

Slightly more compact BLOB
---

diff --git a/c/lib/dec.c b/c/lib/dec.c
index c726d1d..a831f7a 100644
--- a/c/lib/dec.c
+++ b/c/lib/dec.c
@@ -167,11 +167,20 @@ KEKSAtomDecode( // NOLINT(misc-no-recursion)
         break;
     case KEKSAtomBlob: {
         atom->typ = KEKSItemBlob;
-        (*got) += 8;
-        if (len < (*got)) {
-            return KEKSErrNotEnough;
+        if (buf[1] != KEKSAtomPint) {
+            return KEKSErrBlobBadAtom;
+        }
+        size_t pintGot = 0;
+        struct KEKSAtom pint = {0};
+        enum KEKSErr err = KEKSAtomDecode(&pintGot, &pint, buf + 1, len - 1);
+        if (err != KEKSErrNo) {
+            return err;
         }
-        const uint64_t chunkLen = keksFromBE(buf + 1, 8);
+        (*got) += pintGot;
+        if (pint.typ != KEKSItemPint) {
+            return KEKSErrBlobBadAtom;
+        }
+        const uint64_t chunkLen = pint.v.pint;
         if (chunkLen > (SIZE_MAX - 1)) {
             return KEKSErrLenTooBig;
         }
@@ -182,11 +191,11 @@ KEKSAtomDecode( // NOLINT(misc-no-recursion)
     case KEKSAtomPint:
     case KEKSAtomNint: {
         atom->typ = (tag == KEKSAtomPint) ? KEKSItemPint : KEKSItemNint;
-        size_t binGot = 0;
-        struct KEKSAtom bin = {0};
         if ((buf[1] & (unsigned char)KEKSAtomStrings) == 0) {
             return KEKSErrIntNonBin;
         }
+        size_t binGot = 0;
+        struct KEKSAtom bin = {0};
         enum KEKSErr err = KEKSAtomDecode(&binGot, &bin, buf + 1, len - 1);
         if (err != KEKSErrNo) {
             return err;
diff --git a/c/lib/enc.c b/c/lib/enc.c
index 683a9e3..201dc3c 100644
--- a/c/lib/enc.c
+++ b/c/lib/enc.c
@@ -256,14 +256,16 @@ KEKSAtomBlobEncode(
     const size_t chunkLen)
 {
     assert(len != NULL);
-    (*len) = 1 + 8;
-    if (cap < 1 + 8) {
+    assert(chunkLen != 0);
+    (*len) = 1;
+    if (cap <= 1) {
         return false;
     }
     assert(buf != NULL);
     buf[0] = KEKSAtomBlob;
-    keksToBE(buf + 1, 8, (uint64_t)chunkLen - 1);
-    return true;
+    bool ok = KEKSAtomUintEncode(len, buf + 1, cap - 1, chunkLen - 1);
+    (*len)++;
+    return ok;
 }
 
 static bool
diff --git a/go/atom-decode.go b/go/atom-decode.go
index 0138409..cd97983 100644
--- a/go/atom-decode.go
+++ b/go/atom-decode.go
@@ -17,7 +17,6 @@ package keks
 
 import (
 	"errors"
-	"math/big"
 	"strings"
 	"unicode/utf8"
 	"unsafe"
@@ -38,6 +37,7 @@ var (
 	ErrTAINonMinimal   = errors.New("non-minimal TAI64")
 	ErrTAITooManyNsecs = errors.New("too many nanoseconds")
 	ErrTAITooManyAsecs = errors.New("too many attoseconds")
+	ErrBlobBadInt      = errors.New("blob with non Pint")
 )
 
 func (ctx *Decoder) DecodeAtom() (t types.Type, err error) {
@@ -98,12 +98,26 @@ func (ctx *Decoder) DecodeAtom() (t types.Type, err error) {
 	case AtomMap:
 		t = types.Map
 	case AtomBLOB:
-		var s string
-		s, err = ctx.getBytes(8)
+		tag, err = ctx.getByte()
+		if err != nil {
+			return
+		}
+		if AtomType(tag) != AtomPInt {
+			err = ErrBlobBadInt
+			return
+		}
+		var isBig bool
+		isBig, err = ctx.getInt(types.UInt)
 		if err != nil {
 			return
 		}
-		chunkLen := be.Get([]byte(s))
+		if isBig {
+			ctx.bigints = ctx.bigints[:len(ctx.bigints)-1]
+			err = ErrLenTooBig
+			return
+		}
+		chunkLen := ctx.uints[len(ctx.uints)-1]
+		ctx.uints = ctx.uints[:len(ctx.uints)-1]
 		if chunkLen >= (1<<63)-1 {
 			err = ErrLenTooBig
 			return
@@ -117,54 +131,13 @@ func (ctx *Decoder) DecodeAtom() (t types.Type, err error) {
 		} else {
 			t = types.Int
 		}
-		tag, err = ctx.getByte()
-		if err != nil {
-			return
-		}
-		if tag&AtomStrings == 0 || tag&AtomIsUTF8 != 0 {
-			err = ErrIntBad
-			return
-		}
-		var s string
-		s, err = ctx.getStr(tag)
+		var isBig bool
+		isBig, err = ctx.getInt(t)
 		if err != nil {
 			return
 		}
-		if len(s) == 0 {
-			if t == types.UInt {
-				ctx.uints = append(ctx.uints, 0)
-			} else {
-				ctx.ints = append(ctx.ints, -1)
-			}
-			break
-		}
-		if s[0] == 0 {
-			err = ErrIntNonMinimal
-			return
-		}
-		if len(s) > 8 {
-			bi := new(big.Int).SetBytes([]byte(s))
-			if t == types.Int {
-				bi = bi.Add(bi, big.NewInt(1))
-				bi = bi.Neg(bi)
-			}
+		if isBig {
 			t = types.BigInt
-			ctx.bigints = append(ctx.bigints, bi)
-			break
-		}
-		i := be.Get([]byte(s))
-		if t == types.UInt {
-			ctx.uints = append(ctx.uints, i)
-		} else {
-			if i >= (1 << 63) {
-				bi := new(big.Int).SetBytes([]byte(s))
-				bi = bi.Add(bi, big.NewInt(1))
-				bi = bi.Neg(bi)
-				ctx.bigints = append(ctx.bigints, bi)
-				t = types.BigInt
-			} else {
-				ctx.ints = append(ctx.ints, -1-int64(i))
-			}
 		}
 	case AtomFloatNaN:
 		t = types.Float
diff --git a/go/atom-encode.go b/go/atom-encode.go
index 7987c4f..74dd91d 100644
--- a/go/atom-encode.go
+++ b/go/atom-encode.go
@@ -155,10 +155,13 @@ func FloatEncode(w io.Writer, v *Float) (written int64, err error) {
 
 // Write an encoded BLOB atom.
 func BlobAtomEncode(w io.Writer, chunkLen int64) (written int64, err error) {
-	l := make([]byte, 9)
-	l[0] = byte(AtomBLOB)
-	be.Put(l[1:], uint64(chunkLen-1))
-	return io.Copy(w, bytes.NewReader(l))
+	_, err = io.Copy(w, bytes.NewReader([]byte{byte(AtomBLOB), byte(AtomPInt)}))
+	if err != nil {
+		return
+	}
+	written, err = atomUintEncode(w, uint64(chunkLen-1))
+	written += 2
+	return
 }
 
 // Write an encoded BLOB.
diff --git a/go/blob_test.go b/go/blob_test.go
index d8d5114..cbf4b2a 100644
--- a/go/blob_test.go
+++ b/go/blob_test.go
@@ -18,16 +18,15 @@ package keks
 
 import (
 	"bytes"
+	"encoding/hex"
 	"io"
 	"testing"
 	"testing/quick"
-
-	"go.cypherpunks.su/keks/be"
 )
 
 func TestBlobMultipleOfChunkLen(t *testing.T) {
 	bin := bytes.Join([][]byte{
-		mustHexDec("0B0000000000000003"),
+		mustHexDec("0B0C8103"),
 		{0x84},
 		[]byte("test"),
 		{0x84},
@@ -43,7 +42,7 @@ func TestBlobMultipleOfChunkLen(t *testing.T) {
 		t.Fatal(err)
 	}
 	if !bytes.Equal(encoded, bin) {
-		t.Fatal("encoded differs")
+		t.Fatal("encoded differs", hex.EncodeToString(encoded), hex.EncodeToString(bin))
 	}
 	decoder := NewDecoderFromBytes(append(encoded, Junk...), nil)
 	decoded, err := decoder.Decode()
@@ -83,7 +82,7 @@ func TestBlobMultipleOfChunkLen(t *testing.T) {
 
 func TestBlobLargerOfChunkLen(t *testing.T) {
 	bin := bytes.Join([][]byte{
-		mustHexDec("0B0000000000000003"),
+		mustHexDec("0B0C8103"),
 		{0x84},
 		[]byte("test"),
 		{0x84},
@@ -142,7 +141,7 @@ func TestBlobLargerOfChunkLen(t *testing.T) {
 }
 
 func TestBlobEmpty(t *testing.T) {
-	bin := mustHexDec("0B0000000000000003" + "80")
+	bin := mustHexDec("0B0C8103" + "80")
 	encoded, err := EncodeBuf(BlobReader{
 		ChunkLen: 4,
 		R:        bytes.NewReader(nil),
@@ -241,7 +240,7 @@ func TestBlobSymmetric(t *testing.T) {
 
 func TestBlobNotEnoughData(t *testing.T) {
 	bin := bytes.Join([][]byte{
-		mustHexDec("0B0000000000000003"),
+		mustHexDec("0B0C8103"),
 		{0x84},
 		[]byte("test"),
 		{0x84},
@@ -254,17 +253,17 @@ func TestBlobNotEnoughData(t *testing.T) {
 }
 
 func TestBlobTooLong(t *testing.T) {
-	bin := make([]byte, 1+8)
-	bin[0] = byte(AtomBLOB)
-	be.Put(bin[1:], (1<<63)-1)
-	_, err := NewDecoderFromBytes(bin, nil).Decode()
+	var buf bytes.Buffer
+	buf.Write([]byte{byte(AtomBLOB)})
+	UIntEncode(&buf, (1<<63)-1)
+	_, err := NewDecoderFromBytes(buf.Bytes(), nil).Decode()
 	if err != ErrLenTooBig {
 		t.Fatal(err)
 	}
 }
 
 func TestBlobNotEnoughDataForLength(t *testing.T) {
-	bin := mustHexDec("0B00000000")
+	bin := mustHexDec("0B0C81")
 	_, err := NewDecoderFromBytes(bin, nil).Decode()
 	if err != io.ErrUnexpectedEOF {
 		t.Fatal(err)
@@ -273,7 +272,7 @@ func TestBlobNotEnoughDataForLength(t *testing.T) {
 
 func TestBlobWrongTerminatorLength(t *testing.T) {
 	bin := bytes.Join([][]byte{
-		mustHexDec("0B0000000000000003"),
+		mustHexDec("0B0C8103"),
 		{0x84},
 		[]byte("test"),
 		{0x84},
@@ -289,7 +288,7 @@ func TestBlobWrongTerminatorLength(t *testing.T) {
 
 func TestBlobWrongTerminatorTag(t *testing.T) {
 	bin := bytes.Join([][]byte{
-		mustHexDec("0B0000000000000003"),
+		mustHexDec("0B0C8103"),
 		{0x84},
 		[]byte("test"),
 		{0x84},
@@ -304,10 +303,13 @@ func TestBlobWrongTerminatorTag(t *testing.T) {
 }
 
 func TestBlobTooDeep(t *testing.T) {
-	bin := []byte{byte(AtomBLOB)}
-	bin = append(bin, bytes.Repeat([]byte{0x01}, 8)...)
-	bin = append(bin, bytes.Repeat([]byte{byte(AtomList)}, 1000)...)
-	if _, err := NewDecoderFromBytes(bin, nil).Decode(); err != ErrBlobBadAtom {
+	var buf bytes.Buffer
+	buf.Write([]byte{byte(AtomBLOB)})
+	UIntEncode(&buf, 1)
+	for range 1000 {
+		buf.Write([]byte{byte(AtomList)})
+	}
+	if _, err := NewDecoderFromBytes(buf.Bytes(), nil).Decode(); err != ErrBlobBadAtom {
 		t.Fatal(err)
 	}
 }
diff --git a/go/int.go b/go/int.go
new file mode 100644
index 0000000..cdffc12
--- /dev/null
+++ b/go/int.go
@@ -0,0 +1,77 @@
+// KEKS -- Go KEKS codec implementation
+// Copyright (C) 2024-2025 Sergey Matveev <stargrave@stargrave.org>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation, version 3 of the License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package keks
+
+import (
+	"math/big"
+
+	"go.cypherpunks.su/keks/be"
+	"go.cypherpunks.su/keks/types"
+)
+
+func (ctx *Decoder) getInt(t types.Type) (isBig bool, err error) {
+	var tag byte
+	tag, err = ctx.getByte()
+	if err != nil {
+		return
+	}
+	if tag&AtomStrings == 0 || tag&AtomIsUTF8 != 0 {
+		err = ErrIntBad
+		return
+	}
+	var s string
+	s, err = ctx.getStr(tag)
+	if err != nil {
+		return
+	}
+	if len(s) == 0 {
+		if t == types.UInt {
+			ctx.uints = append(ctx.uints, 0)
+		} else {
+			ctx.ints = append(ctx.ints, -1)
+		}
+		return
+	}
+	if s[0] == 0 {
+		err = ErrIntNonMinimal
+		return
+	}
+	if len(s) > 8 {
+		isBig = true
+		bi := new(big.Int).SetBytes([]byte(s))
+		if t == types.Int {
+			bi = bi.Add(bi, big.NewInt(1))
+			bi = bi.Neg(bi)
+		}
+		ctx.bigints = append(ctx.bigints, bi)
+		return
+	}
+	i := be.Get([]byte(s))
+	if t == types.UInt {
+		ctx.uints = append(ctx.uints, i)
+	} else {
+		if i >= (1 << 63) {
+			isBig = true
+			bi := new(big.Int).SetBytes([]byte(s))
+			bi = bi.Add(bi, big.NewInt(1))
+			bi = bi.Neg(bi)
+			ctx.bigints = append(ctx.bigints, bi)
+		} else {
+			ctx.ints = append(ctx.ints, -1-int64(i))
+		}
+	}
+	return
+}
diff --git a/py3/keks.py b/py3/keks.py
index d0e2ee6..0cad765 100755
--- a/py3/keks.py
+++ b/py3/keks.py
@@ -323,7 +323,7 @@ def dumps(v):
     if isinstance(v, Blob):
         assert (v.l > 0) and (v.l <= (1 << 64))
         l, v = v.l, v.v
-        raws = [TagBlobb, (l-1).to_bytes(8, "big")]
+        raws = [TagBlobb, dumps(l-1)]
         append = raws.append
         chunks = len(v) // l
         for i in range(chunks):
@@ -527,10 +527,12 @@ def _loads(v, sets=False, leapsecUTCAllow=False, _allowContainers=True):
             ret = set(ret.keys())
         return ret, v
     if b == TagBlob:
-        if len(v) < 1+8:
-            raise NotEnoughData(1+8-len(v))
-        l = 1 + int.from_bytes(v[1:1+8], "big")
-        v = v[1+8:]
+        if len(v) < 3:
+            raise NotEnoughData(3-len(v))
+        if v[1] != TagPInt:
+            raise DecodeError("blob without Pint")
+        l, v = _loads(v[1:])
+        l += 1
         raws = []
         while True:
             i, v = _loads(v, _allowContainers=False)
diff --git a/py3/tests/test_blob.py b/py3/tests/test_blob.py
index 2ee62ec..b080581 100644
--- a/py3/tests/test_blob.py
+++ b/py3/tests/test_blob.py
@@ -36,7 +36,7 @@ class TestBlob(TestCase):
         self.assertSequenceEqual(
             encoded,
             b"".join((
-                bytes.fromhex("0B0000000000000003"),
+                bytes.fromhex("0B0C8103"),
                 bytes.fromhex("84"), b"test",
                 bytes.fromhex("84"), b"data",
                 bytes.fromhex("80"),
@@ -54,7 +54,7 @@ class TestBlob(TestCase):
         self.assertSequenceEqual(
             encoded,
             b"".join((
-                bytes.fromhex("0B0000000000000003"),
+                bytes.fromhex("0B0C8103"),
                 bytes.fromhex("84"), b"test",
                 bytes.fromhex("84"), b"data",
                 bytes.fromhex("81"), b"2",
@@ -72,7 +72,7 @@ class TestBlob(TestCase):
         self.assertSequenceEqual(
             encoded,
             b"".join((
-                bytes.fromhex("0B0000000000000003"),
+                bytes.fromhex("0B0C8103"),
                 bytes.fromhex("80"),
             )),
         )
@@ -90,7 +90,7 @@ class TestBlob(TestCase):
         chunks = [urandom(chunkLen) for _ in range(chunks)]
         encoded = b"".join((
             b"\x0b",
-            (chunkLen-1).to_bytes(8, "big"),
+            dumps(chunkLen-1),
             b"".join(dumps(chunk) for chunk in chunks),
             b"\x80",
             junk,
@@ -102,7 +102,7 @@ class TestBlob(TestCase):
 
     def test_throws_when_not_enough_data(self) -> None:
         encoded = b"".join((
-            bytes.fromhex("0B0000000000000003"),
+            bytes.fromhex("0B0C8103"),
             bytes.fromhex("84"), b"test",
             bytes.fromhex("84"), b"da",
         ))
@@ -111,14 +111,14 @@ class TestBlob(TestCase):
         self.assertEqual(err.exception.n, 2)
 
     def test_throws_when_not_enough_data_for_length(self) -> None:
-        encoded = bytes.fromhex("0B00000000")
+        encoded = bytes.fromhex("0B0C81")
         with self.assertRaises(NotEnoughData) as err:
             loads(encoded)
-        self.assertEqual(err.exception.n, 8-4)
+        self.assertEqual(err.exception.n, 1)
 
     def test_throws_when_wrong_terminator_length(self) -> None:
         encoded = b"".join((
-            bytes.fromhex("0B0000000000000003"),
+            bytes.fromhex("0B0C8103"),
             bytes.fromhex("84"), b"test",
             bytes.fromhex("84"), b"data",
             bytes.fromhex("8A"), b"terminator",
@@ -129,7 +129,7 @@ class TestBlob(TestCase):
 
     def test_throws_when_wrong_terminator_tag(self) -> None:
         encoded = b"".join((
-            bytes.fromhex("0B0000000000000003"),
+            bytes.fromhex("0B0C8103"),
             bytes.fromhex("84"), b"test",
             bytes.fromhex("84"), b"data",
             bytes.fromhex("04"), b"that was a wrong tag",
diff --git a/py3/tests/test_recursion.py b/py3/tests/test_recursion.py
index da628b0..b17fc4c 100644
--- a/py3/tests/test_recursion.py
+++ b/py3/tests/test_recursion.py
@@ -18,6 +18,7 @@ from unittest import TestCase
 
 from keks import _byte
 from keks import DecodeError
+from keks import dumps
 from keks import loads
 from keks import TagBlob
 from keks import TagList
@@ -34,7 +35,7 @@ class TestTooDeepInt(TestCase):
 class TestTooDeepBlob(TestCase):
     def runTest(self) -> None:
         with self.assertRaises(DecodeError) as err:
-            loads(_byte(TagBlob) + (8 * b"\x01") + _byte(TagList) * 1000)
+            loads(_byte(TagBlob) + dumps(1) + _byte(TagList) * 1000)
         self.assertEqual(str(err.exception), "unknown tag")
 
 
diff --git a/spec/encoding/BLOB b/spec/encoding/BLOB
index e7eb48d..22dddc1 100644
--- a/spec/encoding/BLOB
+++ b/spec/encoding/BLOB
@@ -1,8 +1,8 @@
 BLOB (binary large object) allows you to transfer binary data in chunks,
 in a streaming way, when data may not fit in memory.
 
-64-bit big-endian integer follows the BLOB tag, setting the following
-chunks payload size (+1). Then come one or more binary [encoding/String]
+[encoding/INT] follows the BLOB tag, setting the following chunks
+payload size, plus one. Then comes one or more binary [encoding/String]
 with the chunk-length payload. All of them, except for the last
 one, must have fixed chunk length payload. Last terminating string's
 payload must be shorter.
@@ -10,12 +10,12 @@ payload must be shorter.
 Data format definition must specify exact chunk size expected to be
 used, if it needs deterministic encoding.
 
-    BLOB chunk-len [BIN(len=chunk-len) || ...] BIN(len<chunk-len)
+    BLOB INT(chunk-len) [BIN(len=chunk-len) || ...] BIN(len<chunk-len)
 
 Example representations:
 
-BLOB {5 ""}       | 0B 0000000000000004 80
-BLOB {5 "12345"}  | 0B 0000000000000004 85 3132333435 80
-BLOB {5 "123456"} | 0B 0000000000000004 85 3132333435 81 36
-BLOB {500 "123"}  | 0B 00000000000001F3 83 313233
-BLOB {2 "12345"}  | 0B 0000000000000001 82 3132 82 3334 81 35
+BLOB {5 ""}       | 0B 0C8105   80
+BLOB {5 "12345"}  | 0B 0C8105   85 3132333435 80
+BLOB {5 "123456"} | 0B 0C8105   85 3132333435 81 36
+BLOB {500 "123"}  | 0B 0C8201F4 83 313233
+BLOB {2 "12345"}  | 0B 0C8102   82 3132 82 3334 81 35
diff --git a/spec/encoding/FullTable b/spec/encoding/FullTable
index 2999c36..e355c4d 100644
--- a/spec/encoding/FullTable
+++ b/spec/encoding/FullTable
@@ -10,7 +10,7 @@ dec | hex | bin      | vlen |
 008 | 08  | 00001000 | 0    | [encoding/LIST]
 009 | 09  | 00001001 | 0    | [encoding/MAP]
 010 | 0A  | 00001010 | 0    |
-011 | 0B  | 00001011 | 8+~  | [encoding/BLOB]
+011 | 0B  | 00001011 | 3+~  | [encoding/BLOB]
 012 | 0C  | 00001100 | 1+~  | + [encoding/INT]
 013 | 0D  | 00001101 | 1+~  | - [encoding/INT]
 014 | 0E  | 00001110 | 0    |
diff --git a/spec/encoding/index b/spec/encoding/index
index b7f37ba..6d3fe92 100644
--- a/spec/encoding/index
+++ b/spec/encoding/index
@@ -13,7 +13,7 @@ dec | hex | bin      | vlen |
 008 | 08  | 00001000 | 0    | [encoding/LIST]
 009 | 09  | 00001001 | 0    | [encoding/MAP]
 010 | 0A  | 00001010 |
-011 | 0B  | 00001011 | 8+~  | [encoding/BLOB]
+011 | 0B  | 00001011 | 3+~  | [encoding/BLOB]
 012 | 0C  | 00001100 | 1+~  | + [encoding/INT]
 013 | 0D  | 00001101 | 1+~  | - [encoding/INT]
 ... | ... | ...      | ...  | ...
diff --git a/tcl/keks.tcl b/tcl/keks.tcl
index af5501e..4062a09 100755
--- a/tcl/keks.tcl
+++ b/tcl/keks.tcl
@@ -179,7 +179,7 @@ proc STR {v} {
 proc BLOB {chunkLen v} {
     upvar buf buf
     char [expr 0x0B]
-    toBE 8 [expr {$chunkLen - 1}]
+    INT [expr {$chunkLen - 1}]
     set vl [string length $v]
     set chunks [expr {$vl / $chunkLen}]
     for {set i 0} {$i < $chunks} {incr i} {