break;
case KEKSAtomBlob: {
atom->typ = KEKSItemBlob;
- (*got) += 8;
- if (len < (*got)) {
- return KEKSErrNotEnough;
+ if (buf[1] != KEKSAtomPint) {
+ return KEKSErrBlobBadAtom;
+ }
+ size_t pintGot = 0;
+ struct KEKSAtom pint = {0};
+ enum KEKSErr err = KEKSAtomDecode(&pintGot, &pint, buf + 1, len - 1);
+ if (err != KEKSErrNo) {
+ return err;
}
- const uint64_t chunkLen = keksFromBE(buf + 1, 8);
+ (*got) += pintGot;
+ if (pint.typ != KEKSItemPint) {
+ return KEKSErrBlobBadAtom;
+ }
+ const uint64_t chunkLen = pint.v.pint;
if (chunkLen > (SIZE_MAX - 1)) {
return KEKSErrLenTooBig;
}
case KEKSAtomPint:
case KEKSAtomNint: {
atom->typ = (tag == KEKSAtomPint) ? KEKSItemPint : KEKSItemNint;
- size_t binGot = 0;
- struct KEKSAtom bin = {0};
if ((buf[1] & (unsigned char)KEKSAtomStrings) == 0) {
return KEKSErrIntNonBin;
}
+ size_t binGot = 0;
+ struct KEKSAtom bin = {0};
enum KEKSErr err = KEKSAtomDecode(&binGot, &bin, buf + 1, len - 1);
if (err != KEKSErrNo) {
return err;
const size_t chunkLen)
{
assert(len != NULL);
- (*len) = 1 + 8;
- if (cap < 1 + 8) {
+ assert(chunkLen != 0);
+ (*len) = 1;
+ if (cap <= 1) {
return false;
}
assert(buf != NULL);
buf[0] = KEKSAtomBlob;
- keksToBE(buf + 1, 8, (uint64_t)chunkLen - 1);
- return true;
+ bool ok = KEKSAtomUintEncode(len, buf + 1, cap - 1, chunkLen - 1);
+ (*len)++;
+ return ok;
}
static bool
import (
"errors"
- "math/big"
"strings"
"unicode/utf8"
"unsafe"
ErrTAINonMinimal = errors.New("non-minimal TAI64")
ErrTAITooManyNsecs = errors.New("too many nanoseconds")
ErrTAITooManyAsecs = errors.New("too many attoseconds")
+ ErrBlobBadInt = errors.New("blob with non Pint")
)
func (ctx *Decoder) DecodeAtom() (t types.Type, err error) {
case AtomMap:
t = types.Map
case AtomBLOB:
- var s string
- s, err = ctx.getBytes(8)
+ tag, err = ctx.getByte()
+ if err != nil {
+ return
+ }
+ if AtomType(tag) != AtomPInt {
+ err = ErrBlobBadInt
+ return
+ }
+ var isBig bool
+ isBig, err = ctx.getInt(types.UInt)
if err != nil {
return
}
- chunkLen := be.Get([]byte(s))
+ if isBig {
+ ctx.bigints = ctx.bigints[:len(ctx.bigints)-1]
+ err = ErrLenTooBig
+ return
+ }
+ chunkLen := ctx.uints[len(ctx.uints)-1]
+ ctx.uints = ctx.uints[:len(ctx.uints)-1]
if chunkLen >= (1<<63)-1 {
err = ErrLenTooBig
return
} else {
t = types.Int
}
- tag, err = ctx.getByte()
- if err != nil {
- return
- }
- if tag&AtomStrings == 0 || tag&AtomIsUTF8 != 0 {
- err = ErrIntBad
- return
- }
- var s string
- s, err = ctx.getStr(tag)
+ var isBig bool
+ isBig, err = ctx.getInt(t)
if err != nil {
return
}
- if len(s) == 0 {
- if t == types.UInt {
- ctx.uints = append(ctx.uints, 0)
- } else {
- ctx.ints = append(ctx.ints, -1)
- }
- break
- }
- if s[0] == 0 {
- err = ErrIntNonMinimal
- return
- }
- if len(s) > 8 {
- bi := new(big.Int).SetBytes([]byte(s))
- if t == types.Int {
- bi = bi.Add(bi, big.NewInt(1))
- bi = bi.Neg(bi)
- }
+ if isBig {
t = types.BigInt
- ctx.bigints = append(ctx.bigints, bi)
- break
- }
- i := be.Get([]byte(s))
- if t == types.UInt {
- ctx.uints = append(ctx.uints, i)
- } else {
- if i >= (1 << 63) {
- bi := new(big.Int).SetBytes([]byte(s))
- bi = bi.Add(bi, big.NewInt(1))
- bi = bi.Neg(bi)
- ctx.bigints = append(ctx.bigints, bi)
- t = types.BigInt
- } else {
- ctx.ints = append(ctx.ints, -1-int64(i))
- }
}
case AtomFloatNaN:
t = types.Float
// Write an encoded BLOB atom.
func BlobAtomEncode(w io.Writer, chunkLen int64) (written int64, err error) {
- l := make([]byte, 9)
- l[0] = byte(AtomBLOB)
- be.Put(l[1:], uint64(chunkLen-1))
- return io.Copy(w, bytes.NewReader(l))
+ _, err = io.Copy(w, bytes.NewReader([]byte{byte(AtomBLOB), byte(AtomPInt)}))
+ if err != nil {
+ return
+ }
+ written, err = atomUintEncode(w, uint64(chunkLen-1))
+ written += 2
+ return
}
// Write an encoded BLOB.
import (
"bytes"
+ "encoding/hex"
"io"
"testing"
"testing/quick"
-
- "go.cypherpunks.su/keks/be"
)
func TestBlobMultipleOfChunkLen(t *testing.T) {
bin := bytes.Join([][]byte{
- mustHexDec("0B0000000000000003"),
+ mustHexDec("0B0C8103"),
{0x84},
[]byte("test"),
{0x84},
t.Fatal(err)
}
if !bytes.Equal(encoded, bin) {
- t.Fatal("encoded differs")
+ t.Fatal("encoded differs", hex.EncodeToString(encoded), hex.EncodeToString(bin))
}
decoder := NewDecoderFromBytes(append(encoded, Junk...), nil)
decoded, err := decoder.Decode()
func TestBlobLargerOfChunkLen(t *testing.T) {
bin := bytes.Join([][]byte{
- mustHexDec("0B0000000000000003"),
+ mustHexDec("0B0C8103"),
{0x84},
[]byte("test"),
{0x84},
}
func TestBlobEmpty(t *testing.T) {
- bin := mustHexDec("0B0000000000000003" + "80")
+ bin := mustHexDec("0B0C8103" + "80")
encoded, err := EncodeBuf(BlobReader{
ChunkLen: 4,
R: bytes.NewReader(nil),
func TestBlobNotEnoughData(t *testing.T) {
bin := bytes.Join([][]byte{
- mustHexDec("0B0000000000000003"),
+ mustHexDec("0B0C8103"),
{0x84},
[]byte("test"),
{0x84},
}
func TestBlobTooLong(t *testing.T) {
- bin := make([]byte, 1+8)
- bin[0] = byte(AtomBLOB)
- be.Put(bin[1:], (1<<63)-1)
- _, err := NewDecoderFromBytes(bin, nil).Decode()
+ var buf bytes.Buffer
+ buf.Write([]byte{byte(AtomBLOB)})
+ UIntEncode(&buf, (1<<63)-1)
+ _, err := NewDecoderFromBytes(buf.Bytes(), nil).Decode()
if err != ErrLenTooBig {
t.Fatal(err)
}
}
func TestBlobNotEnoughDataForLength(t *testing.T) {
- bin := mustHexDec("0B00000000")
+ bin := mustHexDec("0B0C81")
_, err := NewDecoderFromBytes(bin, nil).Decode()
if err != io.ErrUnexpectedEOF {
t.Fatal(err)
func TestBlobWrongTerminatorLength(t *testing.T) {
bin := bytes.Join([][]byte{
- mustHexDec("0B0000000000000003"),
+ mustHexDec("0B0C8103"),
{0x84},
[]byte("test"),
{0x84},
func TestBlobWrongTerminatorTag(t *testing.T) {
bin := bytes.Join([][]byte{
- mustHexDec("0B0000000000000003"),
+ mustHexDec("0B0C8103"),
{0x84},
[]byte("test"),
{0x84},
}
func TestBlobTooDeep(t *testing.T) {
- bin := []byte{byte(AtomBLOB)}
- bin = append(bin, bytes.Repeat([]byte{0x01}, 8)...)
- bin = append(bin, bytes.Repeat([]byte{byte(AtomList)}, 1000)...)
- if _, err := NewDecoderFromBytes(bin, nil).Decode(); err != ErrBlobBadAtom {
+ var buf bytes.Buffer
+ buf.Write([]byte{byte(AtomBLOB)})
+ UIntEncode(&buf, 1)
+ for range 1000 {
+ buf.Write([]byte{byte(AtomList)})
+ }
+ if _, err := NewDecoderFromBytes(buf.Bytes(), nil).Decode(); err != ErrBlobBadAtom {
t.Fatal(err)
}
}
--- /dev/null
+// KEKS -- Go KEKS codec implementation
+// Copyright (C) 2024-2025 Sergey Matveev <stargrave@stargrave.org>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as
+// published by the Free Software Foundation, version 3 of the License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package keks
+
+import (
+ "math/big"
+
+ "go.cypherpunks.su/keks/be"
+ "go.cypherpunks.su/keks/types"
+)
+
+func (ctx *Decoder) getInt(t types.Type) (isBig bool, err error) {
+ var tag byte
+ tag, err = ctx.getByte()
+ if err != nil {
+ return
+ }
+ if tag&AtomStrings == 0 || tag&AtomIsUTF8 != 0 {
+ err = ErrIntBad
+ return
+ }
+ var s string
+ s, err = ctx.getStr(tag)
+ if err != nil {
+ return
+ }
+ if len(s) == 0 {
+ if t == types.UInt {
+ ctx.uints = append(ctx.uints, 0)
+ } else {
+ ctx.ints = append(ctx.ints, -1)
+ }
+ return
+ }
+ if s[0] == 0 {
+ err = ErrIntNonMinimal
+ return
+ }
+ if len(s) > 8 {
+ isBig = true
+ bi := new(big.Int).SetBytes([]byte(s))
+ if t == types.Int {
+ bi = bi.Add(bi, big.NewInt(1))
+ bi = bi.Neg(bi)
+ }
+ ctx.bigints = append(ctx.bigints, bi)
+ return
+ }
+ i := be.Get([]byte(s))
+ if t == types.UInt {
+ ctx.uints = append(ctx.uints, i)
+ } else {
+ if i >= (1 << 63) {
+ isBig = true
+ bi := new(big.Int).SetBytes([]byte(s))
+ bi = bi.Add(bi, big.NewInt(1))
+ bi = bi.Neg(bi)
+ ctx.bigints = append(ctx.bigints, bi)
+ } else {
+ ctx.ints = append(ctx.ints, -1-int64(i))
+ }
+ }
+ return
+}
if isinstance(v, Blob):
assert (v.l > 0) and (v.l <= (1 << 64))
l, v = v.l, v.v
- raws = [TagBlobb, (l-1).to_bytes(8, "big")]
+ raws = [TagBlobb, dumps(l-1)]
append = raws.append
chunks = len(v) // l
for i in range(chunks):
ret = set(ret.keys())
return ret, v
if b == TagBlob:
- if len(v) < 1+8:
- raise NotEnoughData(1+8-len(v))
- l = 1 + int.from_bytes(v[1:1+8], "big")
- v = v[1+8:]
+ if len(v) < 3:
+ raise NotEnoughData(3-len(v))
+ if v[1] != TagPInt:
+ raise DecodeError("blob without Pint")
+ l, v = _loads(v[1:])
+ l += 1
raws = []
while True:
i, v = _loads(v, _allowContainers=False)
self.assertSequenceEqual(
encoded,
b"".join((
- bytes.fromhex("0B0000000000000003"),
+ bytes.fromhex("0B0C8103"),
bytes.fromhex("84"), b"test",
bytes.fromhex("84"), b"data",
bytes.fromhex("80"),
self.assertSequenceEqual(
encoded,
b"".join((
- bytes.fromhex("0B0000000000000003"),
+ bytes.fromhex("0B0C8103"),
bytes.fromhex("84"), b"test",
bytes.fromhex("84"), b"data",
bytes.fromhex("81"), b"2",
self.assertSequenceEqual(
encoded,
b"".join((
- bytes.fromhex("0B0000000000000003"),
+ bytes.fromhex("0B0C8103"),
bytes.fromhex("80"),
)),
)
chunks = [urandom(chunkLen) for _ in range(chunks)]
encoded = b"".join((
b"\x0b",
- (chunkLen-1).to_bytes(8, "big"),
+ dumps(chunkLen-1),
b"".join(dumps(chunk) for chunk in chunks),
b"\x80",
junk,
def test_throws_when_not_enough_data(self) -> None:
encoded = b"".join((
- bytes.fromhex("0B0000000000000003"),
+ bytes.fromhex("0B0C8103"),
bytes.fromhex("84"), b"test",
bytes.fromhex("84"), b"da",
))
self.assertEqual(err.exception.n, 2)
def test_throws_when_not_enough_data_for_length(self) -> None:
- encoded = bytes.fromhex("0B00000000")
+ encoded = bytes.fromhex("0B0C81")
with self.assertRaises(NotEnoughData) as err:
loads(encoded)
- self.assertEqual(err.exception.n, 8-4)
+ self.assertEqual(err.exception.n, 1)
def test_throws_when_wrong_terminator_length(self) -> None:
encoded = b"".join((
- bytes.fromhex("0B0000000000000003"),
+ bytes.fromhex("0B0C8103"),
bytes.fromhex("84"), b"test",
bytes.fromhex("84"), b"data",
bytes.fromhex("8A"), b"terminator",
def test_throws_when_wrong_terminator_tag(self) -> None:
encoded = b"".join((
- bytes.fromhex("0B0000000000000003"),
+ bytes.fromhex("0B0C8103"),
bytes.fromhex("84"), b"test",
bytes.fromhex("84"), b"data",
bytes.fromhex("04"), b"that was a wrong tag",
from keks import _byte
from keks import DecodeError
+from keks import dumps
from keks import loads
from keks import TagBlob
from keks import TagList
class TestTooDeepBlob(TestCase):
def runTest(self) -> None:
with self.assertRaises(DecodeError) as err:
- loads(_byte(TagBlob) + (8 * b"\x01") + _byte(TagList) * 1000)
+ loads(_byte(TagBlob) + dumps(1) + _byte(TagList) * 1000)
self.assertEqual(str(err.exception), "unknown tag")
BLOB (binary large object) allows you to transfer binary data in chunks,
in a streaming way, when data may not fit in memory.
-64-bit big-endian integer follows the BLOB tag, setting the following
-chunks payload size (+1). Then come one or more binary [encoding/String]
+[encoding/INT] follows the BLOB tag, setting the following chunks
+payload size, plus one. Then comes one or more binary [encoding/String]
with the chunk-length payload. All of them, except for the last
one, must have fixed chunk length payload. Last terminating string's
payload must be shorter.
Data format definition must specify exact chunk size expected to be
used, if it needs deterministic encoding.
- BLOB chunk-len [BIN(len=chunk-len) || ...] BIN(len<chunk-len)
+ BLOB INT(chunk-len) [BIN(len=chunk-len) || ...] BIN(len<chunk-len)
Example representations:
-BLOB {5 ""} | 0B 0000000000000004 80
-BLOB {5 "12345"} | 0B 0000000000000004 85 3132333435 80
-BLOB {5 "123456"} | 0B 0000000000000004 85 3132333435 81 36
-BLOB {500 "123"} | 0B 00000000000001F3 83 313233
-BLOB {2 "12345"} | 0B 0000000000000001 82 3132 82 3334 81 35
+BLOB {5 ""} | 0B 0C8105 80
+BLOB {5 "12345"} | 0B 0C8105 85 3132333435 80
+BLOB {5 "123456"} | 0B 0C8105 85 3132333435 81 36
+BLOB {500 "123"} | 0B 0C8201F4 83 313233
+BLOB {2 "12345"} | 0B 0C8102 82 3132 82 3334 81 35
008 | 08 | 00001000 | 0 | [encoding/LIST]
009 | 09 | 00001001 | 0 | [encoding/MAP]
010 | 0A | 00001010 | 0 |
-011 | 0B | 00001011 | 8+~ | [encoding/BLOB]
+011 | 0B | 00001011 | 3+~ | [encoding/BLOB]
012 | 0C | 00001100 | 1+~ | + [encoding/INT]
013 | 0D | 00001101 | 1+~ | - [encoding/INT]
014 | 0E | 00001110 | 0 |
008 | 08 | 00001000 | 0 | [encoding/LIST]
009 | 09 | 00001001 | 0 | [encoding/MAP]
010 | 0A | 00001010 |
-011 | 0B | 00001011 | 8+~ | [encoding/BLOB]
+011 | 0B | 00001011 | 3+~ | [encoding/BLOB]
012 | 0C | 00001100 | 1+~ | + [encoding/INT]
013 | 0D | 00001101 | 1+~ | - [encoding/INT]
... | ... | ... | ... | ...
proc BLOB {chunkLen v} {
upvar buf buf
char [expr 0x0B]
- toBE 8 [expr {$chunkLen - 1}]
+ INT [expr {$chunkLen - 1}]
set vl [string length $v]
set chunks [expr {$vl / $chunkLen}]
for {set i 0} {$i < $chunks} {incr i} {