From: Sergey Matveev Date: Wed, 18 Dec 2024 14:27:43 +0000 (+0300) Subject: Highly optimised and revised Go's decoding/encoding X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=ef7cf2083f2605bc3f229079f8874353bba13b10f4c79c895d731ce8cb6a3074;p=keks.git Highly optimised and revised Go's decoding/encoding Much higher decoding performance. Simplified encoding/decoding interfaces. Get rid off atom/ and mapstruct/ packages. --- diff --git a/go/atom/dec.go b/go/atom-decode.go similarity index 50% rename from go/atom/dec.go rename to go/atom-decode.go index 971c96c..bc4096e 100644 --- a/go/atom/dec.go +++ b/go/atom-decode.go @@ -13,7 +13,7 @@ // You should have received a copy of the GNU Lesser General Public // License along with this program. If not, see . -package atom +package keks import ( "errors" @@ -23,9 +23,9 @@ import ( "unsafe" "github.com/google/uuid" - - "go.cypherpunks.su/keks/atom/be" + "go.cypherpunks.su/keks/be" "go.cypherpunks.su/keks/types" + "go.cypherpunks.su/tai64n/v4" ) var ( @@ -36,228 +36,181 @@ var ( ErrBadInt = errors.New("bad int value") ) -func (ctx *Decoder) strDecode(tag byte) (read int64, v []byte, err error) { - l := int64(tag & 63) - var ll int - switch l { - case 61: - ll = 1 - case 62: - ll = 2 - l += ((1 << 8) - 1) - case 63: - ll = 8 - l += ((1 << 8) - 1) + ((1 << 16) - 1) - } - if ll != 0 { - read += int64(ll) - v, err = ctx.Want(ll) - if err != nil { - return - } - ul := be.Get(v) - if ul > (1<<63)-(63+((1<<8)-1)+((1<<16)-1)) { - err = ErrLenTooBig - return - } - l += int64(ul) - } - read += l - if read < 0 { // overflowed - err = ErrLenTooBig - return - } - if ctx.MaxStrLen > 0 && l > ctx.MaxStrLen { - err = ErrLenTooBig - return - } - v, err = ctx.Want(int(l)) - return -} - -// Decode a single KEKS-encoded atom. Atom means that it does not decode -// full lists, maps, blobs and may return types.EOC. -func (ctx *Decoder) Decode() (t types.Type, v any, read int64, err error) { - var buf []byte - buf, err = ctx.Want(1) +func (ctx *Decoder) DecodeAtom() (t types.Type, err error) { + var tag byte + tag, err = ctx.getByte() if err != nil { return } - read = 1 - tag := buf[0] - if (tag & Strings) > 0 { - if (tag & IsUTF8) == 0 { + if (tag & AtomStrings) > 0 { + if (tag & AtomIsUTF8) == 0 { t = types.Bin } else { t = types.Str } - var strRead int64 - strRead, buf, err = ctx.strDecode(tag) - read += strRead + var s string + s, err = ctx.getStr(tag) if err != nil { return } - if t == types.Bin { - v = buf - } else { - s := unsafe.String(unsafe.SliceData(buf), len(buf)) - v = s - if !ctx.DisableUTF8Check { - if !utf8.ValidString(s) { - err = ErrBadUTF8 - } - if strings.Contains(s, "\x00") { - err = ErrBadUTF8 - } + ctx.types = append(ctx.types, t) + ctx.depths = append(ctx.depths, ctx.depth) + ctx.strs = append(ctx.strs, s) + if t == types.Str && ctx.opts != nil && !ctx.opts.DisableUTF8Check { + if !utf8.ValidString(s) { + err = ErrBadUTF8 + } + if strings.Contains(s, "\x00") { + err = ErrBadUTF8 } } return } - switch Type(tag) { - case EOC: + switch AtomType(tag) { + case AtomEOC: t = types.EOC - case NIL: + case AtomNIL: t = types.NIL - case False: + case AtomFalse: t = types.Bool - v = false - case True: + ctx.bools = append(ctx.bools, false) + case AtomTrue: t = types.Bool - v = true - case UUID: - t = types.UUID - read += 16 - buf, err = ctx.Want(16) + ctx.bools = append(ctx.bools, true) + case AtomUUID: + var s string + s, err = ctx.getBytes(16) + if err != nil { + return + } + var v uuid.UUID + v, err = uuid.FromBytes([]byte(s)) if err != nil { return } - v, err = uuid.FromBytes(buf) - case List: + t = types.UUID + ctx.uuids = append(ctx.uuids, v) + case AtomList: t = types.List - case Map: + case AtomMap: t = types.Map - case Blob: - t = types.Blob - read += 8 - buf, err = ctx.Want(8) + case AtomBLOB: + var s string + s, err = ctx.getBytes(8) if err != nil { return } - chunkLen := be.Get(buf) + chunkLen := be.Get([]byte(s)) if chunkLen >= (1<<63)-1 { err = ErrLenTooBig return } chunkLen++ - v = chunkLen - - case PInt, NInt: - if Type(tag) == PInt { + t = types.Blob + ctx.blobChunkLens = append(ctx.blobChunkLens, int64(chunkLen)) + case AtomPInt, AtomNInt: + if AtomType(tag) == AtomPInt { t = types.UInt } else { t = types.Int } - read += 1 - buf, err = ctx.Want(1) + tag, err = ctx.getByte() if err != nil { return } - if buf[0]&Strings == 0 || buf[0]&IsUTF8 != 0 { + if tag&AtomStrings == 0 || tag&AtomIsUTF8 != 0 { err = ErrBadInt return } - var binRead int64 - binRead, buf, err = ctx.strDecode(buf[0]) - read += binRead + var s string + s, err = ctx.getStr(tag) if err != nil { return } - if len(buf) == 0 { + if len(s) == 0 { if t == types.UInt { - v = uint64(0) + ctx.uints = append(ctx.uints, 0) } else { - v = int64(-1) + ctx.ints = append(ctx.ints, -1) } - return + break } - if buf[0] == 0 { + if s[0] == 0 { err = ErrIntNonMinimal return } - if len(buf) > 8 { + if len(s) > 8 { bi := big.NewInt(0) - bi = bi.SetBytes(buf) + bi = bi.SetBytes([]byte(s)) if t == types.Int { n1 := big.NewInt(-1) bi = bi.Sub(n1, bi) } t = types.BigInt - v = bi - return + ctx.bigints = append(ctx.bigints, bi) + break } - i := be.Get(buf) + i := be.Get([]byte(s)) if t == types.UInt { - v = i + ctx.uints = append(ctx.uints, i) } else { if i >= (1 << 63) { bi := big.NewInt(0) - bi = bi.SetBytes(buf) + bi = bi.SetBytes([]byte(s)) n1 := big.NewInt(-1) bi = bi.Sub(n1, bi) + ctx.bigints = append(ctx.bigints, bi) t = types.BigInt - v = bi } else { - v = -1 - int64(i) + ctx.ints = append(ctx.ints, -1-int64(i)) } } - return - - case Float16, Float32, Float64, Float128, Float256: + case AtomFloat16, AtomFloat32, AtomFloat64, AtomFloat128, AtomFloat256: var l int - switch Type(tag) { - case Float16: + switch AtomType(tag) { + case AtomFloat16: l = 2 - case Float32: + case AtomFloat32: l = 4 - case Float64: + case AtomFloat64: l = 8 - case Float128: + case AtomFloat128: l = 16 - case Float256: + case AtomFloat256: l = 32 } - read += int64(l) - buf, err = ctx.Want(l) + var s string + s, err = ctx.getBytes(l) if err != nil { - t = types.Float return } t = types.Raw - v = Raw{T: Type(tag), V: buf} - - case TAI64, TAI64N, TAI64NA: + ctx.rawTypes = append(ctx.rawTypes, AtomType(tag)) + ctx.strs = append(ctx.strs, s) + case AtomTAI64, AtomTAI64N, AtomTAI64NA: var l int - switch Type(tag) { - case TAI64: + switch AtomType(tag) { + case AtomTAI64: + t = types.TAI64 l = 8 - case TAI64N: + case AtomTAI64N: + t = types.TAI64N l = 12 - case TAI64NA: + case AtomTAI64NA: + t = types.TAI64NA l = 16 } - t = types.TAI64 - read += int64(l) - buf, err = ctx.Want(l) + var s string + s, err = ctx.getBytes(l) if err != nil { return } - v = buf - if be.Get(buf[:8]) > (1 << 63) { + if be.Get([]byte(s)[:8]) > (1 << 63) { err = errors.New("reserved TAI64 values in use") return } if l > 8 { - nsecs := be.Get(buf[8 : 8+4]) + nsecs := be.Get([]byte(s)[8 : 8+4]) if l == 12 && nsecs == 0 { err = errors.New("non-minimal TAI64N") return @@ -268,7 +221,7 @@ func (ctx *Decoder) Decode() (t types.Type, v any, read int64, err error) { } } if l > 12 { - asecs := be.Get(buf[8+4 : 8+4+4]) + asecs := be.Get([]byte(s)[8+4 : 8+4+4]) if asecs == 0 { err = errors.New("non-minimal TAI64NA") return @@ -278,9 +231,22 @@ func (ctx *Decoder) Decode() (t types.Type, v any, read int64, err error) { return } } - + switch t { + case types.TAI64: + tai := tai64n.TAI64(unsafe.Slice(unsafe.StringData(s), 8)) + ctx.tai64s = append(ctx.tai64s, tai) + case types.TAI64N: + tai := tai64n.TAI64N(unsafe.Slice(unsafe.StringData(s), 12)) + ctx.tai64ns = append(ctx.tai64ns, tai) + case types.TAI64NA: + tai := tai64n.TAI64NA(unsafe.Slice(unsafe.StringData(s), 16)) + ctx.tai64nas = append(ctx.tai64nas, tai) + } default: err = ErrUnknownType + return } + ctx.types = append(ctx.types, t) + ctx.depths = append(ctx.depths, ctx.depth) return } diff --git a/go/atom/enc.go b/go/atom-encode.go similarity index 52% rename from go/atom/enc.go rename to go/atom-encode.go index 5242b16..baaa5d1 100644 --- a/go/atom/enc.go +++ b/go/atom-encode.go @@ -13,7 +13,7 @@ // You should have received a copy of the GNU Lesser General Public // License along with this program. If not, see . -package atom +package keks import ( "bytes" @@ -21,61 +21,37 @@ import ( "math/big" "github.com/google/uuid" - - "go.cypherpunks.su/keks/atom/be" -) - -var bigIntZero = big.NewInt(0) - -type Type byte - -//go:generate stringer -type=Type -const ( - EOC Type = 0x00 - NIL Type = 0x01 - False Type = 0x02 - True Type = 0x03 - UUID Type = 0x04 - List Type = 0x08 - Map Type = 0x09 - Blob Type = 0x0B - PInt Type = 0x0C - NInt Type = 0x0D - Float16 Type = 0x10 - Float32 Type = 0x11 - Float64 Type = 0x12 - Float128 Type = 0x13 - Float256 Type = 0x14 - TAI64 Type = 0x18 - TAI64N Type = 0x19 - TAI64NA Type = 0x1A - - Strings = 0x80 - IsUTF8 = 0x40 + "go.cypherpunks.su/keks/be" + "go.cypherpunks.su/tai64n/v4" ) -// Write an encoded EOC atom. -func EOCEncode(w io.Writer) (written int64, err error) { - return io.Copy(w, bytes.NewReader([]byte{byte(EOC)})) -} - -// Write an encoded NIL atom. -func NILEncode(w io.Writer) (written int64, err error) { - return io.Copy(w, bytes.NewReader([]byte{byte(NIL)})) +// Just write single byte. Can be convenient for writing LIST/MAP atoms. +func ByteEncode(w io.Writer, b byte) (written int64, err error) { + _, err = w.Write([]byte{b}) + if err != nil { + return + } + written = 1 + return } // Write an encoded TRUE/FALSE atom. func BoolEncode(w io.Writer, v bool) (written int64, err error) { - data := []byte{byte(False)} + data := []byte{byte(AtomFalse)} if v { - data[0] = byte(True) + data[0] = byte(AtomTrue) } - return io.Copy(w, bytes.NewReader(data)) + _, err = w.Write(data) + if err != nil { + return + } + written = 1 + return } // Write an encoded UUID atom. -func UUIDEncode(w io.Writer, v uuid.UUID) (written int64, err error) { - return io.Copy(w, bytes.NewReader(append([]byte{byte(UUID)}, v[:]...))) +func UUIDEncode(w io.Writer, v *uuid.UUID) (written int64, err error) { + return io.Copy(w, bytes.NewReader(append([]byte{byte(AtomUUID)}, v[:]...))) } func atomUintEncode(w io.Writer, v uint64) (written int64, err error) { @@ -95,7 +71,7 @@ func atomUintEncode(w io.Writer, v uint64) (written int64, err error) { // Write an encoded +INT atom. func UIntEncode(w io.Writer, v uint64) (written int64, err error) { - written, err = io.Copy(w, bytes.NewReader([]byte{byte(PInt)})) + _, err = w.Write([]byte{byte(AtomPInt)}) if err != nil { return } @@ -110,7 +86,7 @@ func IntEncode(w io.Writer, v int64) (written int64, err error) { if v >= 0 { return UIntEncode(w, uint64(v)) } - written, err = io.Copy(w, bytes.NewReader([]byte{byte(NInt)})) + _, err = w.Write([]byte{byte(AtomNInt)}) if err != nil { return } @@ -122,18 +98,13 @@ func IntEncode(w io.Writer, v int64) (written int64, err error) { // Write an encoded ±INT atom. func BigIntEncode(w io.Writer, v *big.Int) (written int64, err error) { if v.Cmp(bigIntZero) >= 0 { - written, err = io.Copy(w, bytes.NewReader([]byte{byte(PInt)})) - if err != nil { - return - } - written, err = BinEncode(w, v.Bytes()) - written++ - return + _, err = w.Write([]byte{byte(AtomPInt)}) + } else { + n1 := big.NewInt(-1) + v = v.Abs(v) + v = v.Add(v, n1) + _, err = w.Write([]byte{byte(AtomNInt)}) } - n1 := big.NewInt(-1) - v = v.Abs(v) - v = v.Add(v, n1) - written, err = io.Copy(w, bytes.NewReader([]byte{byte(NInt)})) if err != nil { return } @@ -142,32 +113,55 @@ func BigIntEncode(w io.Writer, v *big.Int) (written int64, err error) { return } -// Write an encoded LIST atom. -// You have to manually terminate it with EOCEncode. -func ListEncode(w io.Writer) (written int64, err error) { - return io.Copy(w, bytes.NewReader([]byte{byte(List)})) -} - -// Write an encoded MAP atom. -// You have to manually terminate it with EOCEncode. -func MapEncode(w io.Writer) (written int64, err error) { - return io.Copy(w, bytes.NewReader([]byte{byte(Map)})) -} - // Write an encoded BLOB atom. // You have to manually provide necessary chunks and // properly terminate it with BinEncode. -func BlobEncode(w io.Writer, chunkLen int) (written int64, err error) { - l := make([]byte, 9) - l[0] = byte(Blob) - be.Put(l[1:], uint64(chunkLen-1)) - return io.Copy(w, bytes.NewReader(l)) +func BlobEncode( + w io.Writer, + chunkLen int64, + r io.Reader, +) (written int64, err error) { + { + l := make([]byte, 9) + l[0] = byte(AtomBLOB) + be.Put(l[1:], uint64(chunkLen-1)) + written, err = io.Copy(w, bytes.NewReader(l)) + } + if err != nil { + return + } + chunk := make([]byte, chunkLen) + var n int + var n64 int64 + for { + n, err = io.ReadFull(r, chunk) + if err != nil { + if err == io.ErrUnexpectedEOF || err == io.EOF { + chunk = chunk[:n] + break + } + return + } + n64, err = ByteEncode(w, byte(AtomNIL)) + if err != nil { + return + } + written += n64 + n64, err = io.Copy(w, bytes.NewReader(chunk)) + written += n64 + if err != nil { + return + } + } + n64, err = BinEncode(w, chunk) + written += n64 + return } func atomStrEncode(w io.Writer, data []byte, utf8 bool) (written int64, err error) { - tag := byte(Strings) + tag := byte(AtomStrings) if utf8 { - tag |= IsUTF8 + tag |= AtomIsUTF8 } var hdr []byte if len(data) >= 63+((1<<8)-1)+((1<<16)-1) { @@ -204,47 +198,35 @@ func BinEncode(w io.Writer, bin []byte) (written int64, err error) { return atomStrEncode(w, bin, false) } -// Write an encoded CHUNK atom. -// That is basically NIL with the chunk value. -func ChunkEncode(w io.Writer, chunk []byte) (written int64, err error) { - written, err = NILEncode(w) +// Write an encoded TAI64 atom. +func TAI64Encode(w io.Writer, tai *tai64n.TAI64) (written int64, err error) { + _, err = w.Write([]byte{byte(AtomTAI64)}) if err != nil { return } - written, err = io.Copy(w, bytes.NewReader(chunk)) + written, err = io.Copy(w, bytes.NewReader(tai[:])) written++ return } -// Write an encoded TAI64* atom. -func TAI64Encode(w io.Writer, tai []byte) (written int64, err error) { - var tag []byte - switch len(tai) { - case 8: - tag = []byte{byte(TAI64)} - case 12: - tag = []byte{byte(TAI64N)} - case 16: - tag = []byte{byte(TAI64NA)} - default: - panic("wrong TAI64 value") - } - written, err = io.Copy(w, bytes.NewReader(tag)) +// Write an encoded TAI64N atom. +func TAI64NEncode(w io.Writer, tai *tai64n.TAI64N) (written int64, err error) { + _, err = w.Write([]byte{byte(AtomTAI64N)}) if err != nil { return } - written, err = io.Copy(w, bytes.NewReader(tai)) + written, err = io.Copy(w, bytes.NewReader(tai[:])) written++ return } -// Write an encoded raw atom's value. -func RawEncode(w io.Writer, raw Raw) (written int64, err error) { - written, err = io.Copy(w, bytes.NewReader([]byte{byte(raw.T)})) +// Write an encoded TAI64NA atom. +func TAI64NAEncode(w io.Writer, tai *tai64n.TAI64NA) (written int64, err error) { + _, err = w.Write([]byte{byte(AtomTAI64NA)}) if err != nil { return } - written, err = io.Copy(w, bytes.NewReader(raw.V)) + written, err = io.Copy(w, bytes.NewReader(tai[:])) written++ return } diff --git a/go/atom/raw.go b/go/atom/raw.go deleted file mode 100644 index 0148d86..0000000 --- a/go/atom/raw.go +++ /dev/null @@ -1,32 +0,0 @@ -// GoKEKS -- Go KEKS codec implementation -// Copyright (C) 2024-2025 Sergey Matveev -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation, version 3 of the License. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this program. If not, see . - -package atom - -import ( - "encoding/hex" - "fmt" -) - -// Raw atom storage, keeping the tag T and contents V after it. -// Used for keeping data that can not be represented in native Go types. -type Raw struct { - V []byte - T Type -} - -func (raw *Raw) String() string { - return fmt.Sprintf("RAW(%v, %s)", raw.T, hex.EncodeToString(raw.V)) -} diff --git a/go/atom/type_string.go b/go/atom/type_string.go deleted file mode 100644 index b35d04b..0000000 --- a/go/atom/type_string.go +++ /dev/null @@ -1,66 +0,0 @@ -// Code generated by "stringer -type=Type"; DO NOT EDIT. - -package atom - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[EOC-0] - _ = x[NIL-1] - _ = x[False-2] - _ = x[True-3] - _ = x[UUID-4] - _ = x[List-8] - _ = x[Map-9] - _ = x[Blob-11] - _ = x[PInt-12] - _ = x[NInt-13] - _ = x[Float16-16] - _ = x[Float32-17] - _ = x[Float64-18] - _ = x[Float128-19] - _ = x[Float256-20] - _ = x[TAI64-24] - _ = x[TAI64N-25] - _ = x[TAI64NA-26] -} - -const ( - _Type_name_0 = "EOCNILFalseTrueUUID" - _Type_name_1 = "ListMap" - _Type_name_2 = "BlobPIntNInt" - _Type_name_3 = "Float16Float32Float64Float128Float256" - _Type_name_4 = "TAI64TAI64NTAI64NA" -) - -var ( - _Type_index_0 = [...]uint8{0, 3, 6, 11, 15, 19} - _Type_index_1 = [...]uint8{0, 4, 7} - _Type_index_2 = [...]uint8{0, 4, 8, 12} - _Type_index_3 = [...]uint8{0, 7, 14, 21, 29, 37} - _Type_index_4 = [...]uint8{0, 5, 11, 18} -) - -func (i Type) String() string { - switch { - case i <= 4: - return _Type_name_0[_Type_index_0[i]:_Type_index_0[i+1]] - case 8 <= i && i <= 9: - i -= 8 - return _Type_name_1[_Type_index_1[i]:_Type_index_1[i+1]] - case 11 <= i && i <= 13: - i -= 11 - return _Type_name_2[_Type_index_2[i]:_Type_index_2[i+1]] - case 16 <= i && i <= 20: - i -= 16 - return _Type_name_3[_Type_index_3[i]:_Type_index_3[i+1]] - case 24 <= i && i <= 26: - i -= 24 - return _Type_name_4[_Type_index_4[i]:_Type_index_4[i+1]] - default: - return "Type(" + strconv.FormatInt(int64(i), 10) + ")" - } -} diff --git a/go/atomtype_string.go b/go/atomtype_string.go new file mode 100644 index 0000000..8a29a79 --- /dev/null +++ b/go/atomtype_string.go @@ -0,0 +1,66 @@ +// Code generated by "stringer -type=AtomType"; DO NOT EDIT. + +package keks + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[AtomEOC-0] + _ = x[AtomNIL-1] + _ = x[AtomFalse-2] + _ = x[AtomTrue-3] + _ = x[AtomUUID-4] + _ = x[AtomList-8] + _ = x[AtomMap-9] + _ = x[AtomBLOB-11] + _ = x[AtomPInt-12] + _ = x[AtomNInt-13] + _ = x[AtomFloat16-16] + _ = x[AtomFloat32-17] + _ = x[AtomFloat64-18] + _ = x[AtomFloat128-19] + _ = x[AtomFloat256-20] + _ = x[AtomTAI64-24] + _ = x[AtomTAI64N-25] + _ = x[AtomTAI64NA-26] +} + +const ( + _AtomType_name_0 = "AtomEOCAtomNILAtomFalseAtomTrueAtomUUID" + _AtomType_name_1 = "AtomListAtomMap" + _AtomType_name_2 = "AtomBLOBAtomPIntAtomNInt" + _AtomType_name_3 = "AtomFloat16AtomFloat32AtomFloat64AtomFloat128AtomFloat256" + _AtomType_name_4 = "AtomTAI64AtomTAI64NAtomTAI64NA" +) + +var ( + _AtomType_index_0 = [...]uint8{0, 7, 14, 23, 31, 39} + _AtomType_index_1 = [...]uint8{0, 8, 15} + _AtomType_index_2 = [...]uint8{0, 8, 16, 24} + _AtomType_index_3 = [...]uint8{0, 11, 22, 33, 45, 57} + _AtomType_index_4 = [...]uint8{0, 9, 19, 30} +) + +func (i AtomType) String() string { + switch { + case i <= 4: + return _AtomType_name_0[_AtomType_index_0[i]:_AtomType_index_0[i+1]] + case 8 <= i && i <= 9: + i -= 8 + return _AtomType_name_1[_AtomType_index_1[i]:_AtomType_index_1[i+1]] + case 11 <= i && i <= 13: + i -= 11 + return _AtomType_name_2[_AtomType_index_2[i]:_AtomType_index_2[i+1]] + case 16 <= i && i <= 20: + i -= 16 + return _AtomType_name_3[_AtomType_index_3[i]:_AtomType_index_3[i+1]] + case 24 <= i && i <= 26: + i -= 24 + return _AtomType_name_4[_AtomType_index_4[i]:_AtomType_index_4[i+1]] + default: + return "AtomType(" + strconv.FormatInt(int64(i), 10) + ")" + } +} diff --git a/go/atom/be/be.go b/go/be/be.go similarity index 100% rename from go/atom/be/be.go rename to go/be/be.go diff --git a/go/blob.go b/go/blob.go index cac2cd1..ed27cdf 100644 --- a/go/blob.go +++ b/go/blob.go @@ -18,15 +18,38 @@ package keks import ( "fmt" "io" + "strings" ) -// BLOB object. You have to set its ChunkLen > 0 and data is read from R. -type Blob struct { - R io.Reader - ChunkLen int - DecodedLen int64 // filled up after decoding +type BlobChunked struct { + Chunks []string + ChunkLen int64 } -func (blob *Blob) String() string { - return fmt.Sprintf("BLOB(%d, %d)", blob.ChunkLen, blob.DecodedLen) +func (blob *BlobChunked) Len() (sum int64) { + for _, s := range blob.Chunks { + sum += int64(len(s)) + } + return +} + +func (blob *BlobChunked) Reader() io.Reader { + rs := make([]io.Reader, 0, len(blob.Chunks)) + for _, s := range blob.Chunks { + rs = append(rs, strings.NewReader(s)) + } + return io.MultiReader(rs...) +} + +func (blob *BlobChunked) String() string { + return fmt.Sprintf("BLOB(%d, %d)", blob.ChunkLen, blob.Len()) +} + +type BlobReader struct { + R io.Reader + ChunkLen int64 +} + +func (blob *BlobReader) String() string { + return fmt.Sprintf("BLOB(%d, ~)", blob.ChunkLen) } diff --git a/go/cmd/iter-print/main.go b/go/cmd/iter-print/main.go new file mode 100644 index 0000000..ffce5cb --- /dev/null +++ b/go/cmd/iter-print/main.go @@ -0,0 +1,119 @@ +// GoKEKS -- Go KEKS codec implementation +// Copyright (C) 2024-2025 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +package main + +import ( + "bufio" + "encoding/hex" + "fmt" + "log" + "os" + "strings" + "time" + + "go.cypherpunks.su/keks" + "go.cypherpunks.su/keks/types" +) + +func prindent(depth int) { + fmt.Print(strings.Repeat(" ", depth)) +} + +func printer(iter *keks.Iterator, count int, inList, inMap bool) { + for i := 0; i < count; i++ { + if !iter.Next() { + panic("unexpected") + } + depth := iter.Depth + prindent(depth) + if inList { + fmt.Printf("%d: ", i) + } else if inMap { + fmt.Print(iter.Str()) + fmt.Print(": ") + if !iter.Next() { + panic("unexpected") + } + } + switch iter.T { + case types.List: + fmt.Printf("[ %d\n", iter.Len()) + printer(iter, iter.Len(), true, false) + prindent(depth) + fmt.Println("]") + case types.Map: + fmt.Printf("{ %d\n", iter.Len()) + printer(iter, iter.Len(), false, true) + prindent(depth) + fmt.Println("}") + + case types.NIL: + fmt.Println("NIL") + case types.Bool: + if iter.Bool() { + fmt.Println("TRUE") + } else { + fmt.Println("FALSE") + } + case types.UUID: + fmt.Println(iter.UUID()) + case types.UInt: + fmt.Println(iter.UInt()) + case types.Int: + fmt.Println(iter.Int()) + case types.BigInt: + fmt.Println(iter.BigInt()) + case types.Blob: + blob := iter.Blob() + fmt.Printf("BLOB[ %d l=%d\n", len(blob.Chunks), blob.ChunkLen) + for i, chunk := range blob.Chunks { + fmt.Print(strings.Repeat(" ", iter.Depth+1)) + fmt.Printf("%d: %d:%s\n", + i, len(chunk), hex.EncodeToString([]byte(chunk))) + } + case types.TAI64: + t := iter.TAI64() + fmt.Printf("%s TAI\n", t.Time().Format(time.DateTime)) + case types.TAI64N: + t := iter.TAI64N() + fmt.Printf("%s TAI\n", t.Time().Format(time.DateTime+".000000000")) + case types.TAI64NA: + t := iter.TAI64NA() + fmt.Printf("TAI64NA[%s]\n", hex.EncodeToString(t[:])) + case types.Bin: + s := iter.Bin() + fmt.Printf("%d:%s\n", len(s), hex.EncodeToString([]byte(s))) + case types.Str: + fmt.Print(`"`) + fmt.Print(iter.Str()) + fmt.Println(`"`) + case types.Raw: + fmt.Printf("RAW[%s]\n", hex.EncodeToString(iter.Raw())) + default: + fmt.Println("???") + } + } +} + +func main() { + ctx := keks.NewDecoderFromReader(bufio.NewReader(os.Stdin), nil) + _, err := ctx.Parse() + if err != nil { + log.Fatal(err) + } + iter := ctx.Iter() + printer(iter, 1, false, false) +} diff --git a/go/cmd/print/main.go b/go/cmd/print/main.go index 36258d8..78f4ce9 100644 --- a/go/cmd/print/main.go +++ b/go/cmd/print/main.go @@ -7,17 +7,13 @@ import ( "os" "go.cypherpunks.su/keks" - "go.cypherpunks.su/keks/atom" ) func main() { - item, read, err := keks.Decode(&atom.Decoder{R: bufio.NewReader(os.Stdin)}) + decoder := keks.NewDecoderFromReader(bufio.NewReader(os.Stdin), nil) + v, err := decoder.Decode() if err != nil { log.Fatal(err) } - e, err := item.ToGo() - if err != nil { - log.Fatal(err) - } - fmt.Printf("%v\n%d bytes\n", e, read) + fmt.Printf("%v\n%d bytes\n", v, decoder.Read) } diff --git a/go/cmd/test-vector-anys/main.go b/go/cmd/test-vector-anys/main.go index 48fb67b..a5d232a 100644 --- a/go/cmd/test-vector-anys/main.go +++ b/go/cmd/test-vector-anys/main.go @@ -10,9 +10,9 @@ import ( "time" "github.com/google/uuid" + "go.cypherpunks.su/tai64n/v4" "go.cypherpunks.su/keks" - "go.cypherpunks.su/keks/atom" ) func mustHexDec(s string) []byte { @@ -80,38 +80,31 @@ func main() { "utf8": "привет мир", }, "blob": []any{ - keks.Blob{ChunkLen: 12, R: strings.NewReader("5")}, - keks.Blob{ChunkLen: 12, R: strings.NewReader(strings.Repeat("6", 12))}, - keks.Blob{ChunkLen: 12, R: strings.NewReader(strings.Repeat("7", 13))}, - keks.Blob{ChunkLen: 5, R: strings.NewReader("1234567890-")}, + keks.BlobReader{ChunkLen: 12, R: strings.NewReader("5")}, + keks.BlobReader{ChunkLen: 12, R: strings.NewReader(strings.Repeat("6", 12))}, + keks.BlobReader{ChunkLen: 12, R: strings.NewReader(strings.Repeat("7", 13))}, + keks.BlobReader{ChunkLen: 5, R: strings.NewReader("1234567890-")}, }, "empties": []any{ []any{}, map[string]any{}, - keks.Blob{ChunkLen: 123, R: bytes.NewReader(nil)}, + keks.BlobReader{ChunkLen: 123, R: bytes.NewReader(nil)}, uuid.Nil, - atom.Raw{T: atom.TAI64, V: mustHexDec("0000000000000000")}, + tai64n.TAI64(mustHexDec("0000000000000000")), }, "dates": []any{ time.Unix(1234567890, 0), time.Unix(1234567890, 456*1000), time.Unix(1234567890, 456789), - atom.Raw{ - T: atom.TAI64NA, - V: mustHexDec("40000000499602F40006F855075BCD15"), - }, + tai64n.TAI64NA(mustHexDec("40000000499602F40006F855075BCD15")), }, "floats": []any{ - atom.Raw{T: atom.Float32, V: []byte("\x01\x02\x03\x04")}, + keks.Raw(append([]byte{byte(keks.AtomFloat32)}, mustHexDec("01020304")...)), }, "uuid": uuid.MustParse("0e875e3f-d385-49eb-87b4-be42d641c367"), } - item, err := keks.FromGo(data) - if err != nil { - log.Fatal(err) - } var buf bytes.Buffer - _, err = item.Encode(&buf) + _, err := keks.Encode(&buf, data) if err != nil { log.Fatal(err) } diff --git a/go/cmd/test-vector-manual/main.go b/go/cmd/test-vector-manual/main.go index 79c4a3a..2d933a9 100644 --- a/go/cmd/test-vector-manual/main.go +++ b/go/cmd/test-vector-manual/main.go @@ -4,13 +4,14 @@ import ( "bytes" "encoding/hex" "fmt" + "io" "math/big" "time" "github.com/google/uuid" "go.cypherpunks.su/tai64n/v4" - "go.cypherpunks.su/keks/atom" + "go.cypherpunks.su/keks" ) func mustHexDec(s string) []byte { @@ -33,180 +34,161 @@ func mustEncode(n int64, err error) { func main() { var buf bytes.Buffer { - mustEncode(atom.MapEncode(&buf)) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomMap))) { - mustEncode(atom.StrEncode(&buf, "nil")) - mustEncode(atom.NILEncode(&buf)) + mustEncode(keks.StrEncode(&buf, "nil")) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomNIL))) } { - mustEncode(atom.StrEncode(&buf, "str")) - mustEncode(atom.MapEncode(&buf)) + mustEncode(keks.StrEncode(&buf, "str")) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomMap))) { - mustEncode(atom.StrEncode(&buf, "bin")) - mustEncode(atom.ListEncode(&buf)) + mustEncode(keks.StrEncode(&buf, "bin")) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomList))) { - mustEncode(atom.BinEncode(&buf, []byte(""))) - mustEncode(atom.BinEncode(&buf, bytes.Repeat([]byte{'0'}, 60))) - mustEncode(atom.BinEncode(&buf, bytes.Repeat([]byte{'1'}, 61))) - mustEncode(atom.BinEncode(&buf, bytes.Repeat([]byte{'2'}, 255))) - mustEncode(atom.BinEncode(&buf, bytes.Repeat([]byte{'A'}, 61+255))) - mustEncode(atom.BinEncode(&buf, bytes.Repeat([]byte{'B'}, 62+255))) - mustEncode(atom.BinEncode(&buf, bytes.Repeat([]byte{'3'}, 1024))) - mustEncode(atom.BinEncode(&buf, bytes.Repeat([]byte{'4'}, 63+255+65535+1))) + mustEncode(keks.BinEncode(&buf, []byte(""))) + mustEncode(keks.BinEncode(&buf, bytes.Repeat([]byte{'0'}, 60))) + mustEncode(keks.BinEncode(&buf, bytes.Repeat([]byte{'1'}, 61))) + mustEncode(keks.BinEncode(&buf, bytes.Repeat([]byte{'2'}, 255))) + mustEncode(keks.BinEncode(&buf, bytes.Repeat([]byte{'A'}, 61+255))) + mustEncode(keks.BinEncode(&buf, bytes.Repeat([]byte{'B'}, 62+255))) + mustEncode(keks.BinEncode(&buf, bytes.Repeat([]byte{'3'}, 1024))) + mustEncode(keks.BinEncode(&buf, bytes.Repeat([]byte{'4'}, 63+255+65535+1))) } - mustEncode(atom.EOCEncode(&buf)) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) { - mustEncode(atom.StrEncode(&buf, "utf8")) - mustEncode(atom.StrEncode(&buf, "привет мир")) + mustEncode(keks.StrEncode(&buf, "utf8")) + mustEncode(keks.StrEncode(&buf, "привет мир")) } } - mustEncode(atom.EOCEncode(&buf)) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } { - mustEncode(atom.StrEncode(&buf, "blob")) - mustEncode(atom.ListEncode(&buf)) - { - mustEncode(atom.BlobEncode(&buf, 12)) - mustEncode(atom.BinEncode(&buf, []byte{'5'})) - } - { - mustEncode(atom.BlobEncode(&buf, 12)) - mustEncode(atom.ChunkEncode(&buf, bytes.Repeat([]byte{'6'}, 12))) - mustEncode(atom.BinEncode(&buf, []byte{})) - } - { - mustEncode(atom.BlobEncode(&buf, 12)) - mustEncode(atom.ChunkEncode(&buf, bytes.Repeat([]byte{'7'}, 12))) - mustEncode(atom.BinEncode(&buf, []byte{'7'})) - } - { - mustEncode(atom.BlobEncode(&buf, 5)) - mustEncode(atom.ChunkEncode(&buf, []byte("12345"))) - mustEncode(atom.ChunkEncode(&buf, []byte("67890"))) - mustEncode(atom.BinEncode(&buf, []byte{'-'})) - } - mustEncode(atom.EOCEncode(&buf)) + mustEncode(keks.StrEncode(&buf, "blob")) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomList))) + mustEncode(keks.BlobEncode(&buf, 12, bytes.NewReader([]byte{'5'}))) + mustEncode(keks.BlobEncode(&buf, 12, bytes.NewReader(bytes.Repeat([]byte{'6'}, 12)))) + mustEncode(keks.BlobEncode(&buf, 12, bytes.NewReader(bytes.Repeat([]byte{'7'}, 13)))) + mustEncode(keks.BlobEncode(&buf, 5, bytes.NewReader([]byte("1234567890-")))) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } { - mustEncode(atom.StrEncode(&buf, "bool")) - mustEncode(atom.ListEncode(&buf)) - mustEncode(atom.BoolEncode(&buf, true)) - mustEncode(atom.BoolEncode(&buf, false)) - mustEncode(atom.EOCEncode(&buf)) + mustEncode(keks.StrEncode(&buf, "bool")) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomList))) + mustEncode(keks.BoolEncode(&buf, true)) + mustEncode(keks.BoolEncode(&buf, false)) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } { - mustEncode(atom.StrEncode(&buf, "ints")) - mustEncode(atom.MapEncode(&buf)) + mustEncode(keks.StrEncode(&buf, "ints")) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomMap))) { - mustEncode(atom.StrEncode(&buf, "neg")) - mustEncode(atom.ListEncode(&buf)) - mustEncode(atom.IntEncode(&buf, -1)) - mustEncode(atom.IntEncode(&buf, -2)) - mustEncode(atom.IntEncode(&buf, -32)) - mustEncode(atom.IntEncode(&buf, -33)) - mustEncode(atom.IntEncode(&buf, -123)) - mustEncode(atom.IntEncode(&buf, -1234)) - mustEncode(atom.IntEncode(&buf, -12345678)) + mustEncode(keks.StrEncode(&buf, "neg")) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomList))) + mustEncode(keks.IntEncode(&buf, -1)) + mustEncode(keks.IntEncode(&buf, -2)) + mustEncode(keks.IntEncode(&buf, -32)) + mustEncode(keks.IntEncode(&buf, -33)) + mustEncode(keks.IntEncode(&buf, -123)) + mustEncode(keks.IntEncode(&buf, -1234)) + mustEncode(keks.IntEncode(&buf, -12345678)) b := big.NewInt(0) b.SetBytes(mustHexDec("0100000000000000000000")) b = b.Neg(b) - mustEncode(atom.BigIntEncode(&buf, b)) + mustEncode(keks.BigIntEncode(&buf, b)) b.SetBytes(mustHexDec("0100000000000000000000000000000001")) b = b.Neg(b) - mustEncode(atom.BigIntEncode(&buf, b)) + mustEncode(keks.BigIntEncode(&buf, b)) b.SetBytes(mustHexDec("e5a461280341856d4ad908a69ea5f3ccc10c7882142bb7d801cc380f26b6b4d69632024ee521f8cfafb443d49a2a3d0cc73bb4757e882f5396ed302b418210d0d49d71be86ca699cf5ee3bd6d57ed658e69316229644ba650c92d7f0d4db29c3ad1dfa9979166f4c6e79561a58f8e2c63d08df4e2246ed1f64d2d613a19d8c9a6870e6188e2f3ad40c038fda30452f8ddfcd212a6a974bc25ec6a0564c66a7d28750ff9db458b74441e49ee5e82dbf4974d645678e0ad031f97aaba855451eef17a89b42821e530816dd5793a83b7a82e8ede81e7f3395691f761784f8bc627961cd40845ee908a40b9d1f01927b38eb1a7d4efd60db0944f7ec1b832b7e6eb1833f9a351576ad5de571fae8865da7514f06b0fbf38c1f2a8538f5d38b4e18001ccbb9ddcb488530f6086d14744d8b5672166e48e9ef93772575db66b6f257c6ffad6e2c291510c5ed02e1a8b24b44ec1e2a91686238e8defd18c01998634a5076a6b7f85fc81a1d61a15b2c528dfa082ce3e3e2ca649ac04817ec5c123e0b761ab103f780c014f021bbeb7ea3b86e0ca1c833e38ef5c897a6d7e1f4a2398c490b3d65e2f45c7fae402d1df1698b6fddb185481664871c2664bfd1686b2b3372783f1856f6247a3f8437a2818f68b7c4ea13a5f57b73c72870b684045f15")) b = b.Neg(b) - mustEncode(atom.BigIntEncode(&buf, b)) - mustEncode(atom.EOCEncode(&buf)) + mustEncode(keks.BigIntEncode(&buf, b)) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } { - mustEncode(atom.StrEncode(&buf, "pos")) - mustEncode(atom.ListEncode(&buf)) - mustEncode(atom.UIntEncode(&buf, 0)) - mustEncode(atom.UIntEncode(&buf, 1)) - mustEncode(atom.UIntEncode(&buf, 31)) - mustEncode(atom.UIntEncode(&buf, 32)) - mustEncode(atom.UIntEncode(&buf, 123)) - mustEncode(atom.UIntEncode(&buf, 1234)) - mustEncode(atom.UIntEncode(&buf, 12345678)) + mustEncode(keks.StrEncode(&buf, "pos")) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomList))) + mustEncode(keks.UIntEncode(&buf, 0)) + mustEncode(keks.UIntEncode(&buf, 1)) + mustEncode(keks.UIntEncode(&buf, 31)) + mustEncode(keks.UIntEncode(&buf, 32)) + mustEncode(keks.UIntEncode(&buf, 123)) + mustEncode(keks.UIntEncode(&buf, 1234)) + mustEncode(keks.UIntEncode(&buf, 12345678)) b := big.NewInt(0) b.SetBytes(mustHexDec("0100000000000000000000")) - mustEncode(atom.BigIntEncode(&buf, b)) + mustEncode(keks.BigIntEncode(&buf, b)) b.SetBytes(mustHexDec("0100000000000000000000000000000000")) - mustEncode(atom.BigIntEncode(&buf, b)) - mustEncode(atom.EOCEncode(&buf)) + mustEncode(keks.BigIntEncode(&buf, b)) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } - mustEncode(atom.EOCEncode(&buf)) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } { - mustEncode(atom.StrEncode(&buf, "uuid")) - mustEncode(atom.UUIDEncode(&buf, - uuid.MustParse("0e875e3f-d385-49eb-87b4-be42d641c367"))) + u := uuid.MustParse("0e875e3f-d385-49eb-87b4-be42d641c367") + mustEncode(keks.StrEncode(&buf, "uuid")) + mustEncode(keks.UUIDEncode(&buf, &u)) } { - mustEncode(atom.StrEncode(&buf, "dates")) - mustEncode(atom.ListEncode(&buf)) + mustEncode(keks.StrEncode(&buf, "dates")) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomList))) { var tai tai64n.TAI64 t := time.Unix(1234567890, 0) t = tai64n.Leapsecs.Add(t) tai.FromTime(t) - mustEncode(atom.TAI64Encode(&buf, tai[:])) + mustEncode(keks.TAI64Encode(&buf, &tai)) } { var tai tai64n.TAI64N t := time.Unix(1234567890, 456*1000) t = tai64n.Leapsecs.Add(t) tai.FromTime(t) - mustEncode(atom.TAI64Encode(&buf, tai[:])) + mustEncode(keks.TAI64NEncode(&buf, &tai)) } { var tai tai64n.TAI64N t := time.Unix(1234567890, 456789) t = tai64n.Leapsecs.Add(t) tai.FromTime(t) - mustEncode(atom.TAI64Encode(&buf, tai[:])) + mustEncode(keks.TAI64NEncode(&buf, &tai)) } - mustEncode(atom.RawEncode(&buf, atom.Raw{ - T: atom.TAI64NA, - V: []byte("\x40\x00\x00\x00\x49\x96\x02\xF4\x00\x06\xF8\x55\x07\x5B\xCD\x15"), - })) - mustEncode(atom.EOCEncode(&buf)) + mustEncode(io.Copy(&buf, bytes.NewReader(append( + []byte{byte(keks.AtomTAI64NA)}, + []byte("\x40\x00\x00\x00\x49\x96\x02\xF4\x00\x06\xF8\x55\x07\x5B\xCD\x15")..., + )))) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } { - mustEncode(atom.StrEncode(&buf, "floats")) - mustEncode(atom.ListEncode(&buf)) - mustEncode(atom.RawEncode(&buf, atom.Raw{ - T: atom.Float32, - V: []byte("\x01\x02\x03\x04"), - })) - mustEncode(atom.EOCEncode(&buf)) + mustEncode(keks.StrEncode(&buf, "floats")) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomList))) + mustEncode(io.Copy(&buf, bytes.NewReader(append( + []byte{byte(keks.AtomFloat32)}, + []byte("\x01\x02\x03\x04")..., + )))) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } { - mustEncode(atom.StrEncode(&buf, "empties")) - mustEncode(atom.ListEncode(&buf)) - { - mustEncode(atom.ListEncode(&buf)) - mustEncode(atom.EOCEncode(&buf)) - } + mustEncode(keks.StrEncode(&buf, "empties")) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomList))) { - mustEncode(atom.MapEncode(&buf)) - mustEncode(atom.EOCEncode(&buf)) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomList))) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } { - mustEncode(atom.BlobEncode(&buf, 123)) - mustEncode(atom.BinEncode(&buf, []byte{})) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomMap))) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } - mustEncode(atom.UUIDEncode(&buf, uuid.Nil)) - mustEncode(atom.RawEncode(&buf, atom.Raw{ - T: atom.TAI64, - V: []byte("\x00\x00\x00\x00\x00\x00\x00\x00"), - })) - mustEncode(atom.EOCEncode(&buf)) + mustEncode(keks.BlobEncode(&buf, 123, bytes.NewReader([]byte{}))) + mustEncode(keks.UUIDEncode(&buf, &uuid.Nil)) + mustEncode(io.Copy(&buf, bytes.NewReader(append( + []byte{byte(keks.AtomTAI64)}, + []byte("\x00\x00\x00\x00\x00\x00\x00\x00")..., + )))) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } - mustEncode(atom.EOCEncode(&buf)) + mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } fmt.Println(hex.EncodeToString(buf.Bytes())) } diff --git a/go/ctx.go b/go/ctx.go new file mode 100644 index 0000000..3e7f903 --- /dev/null +++ b/go/ctx.go @@ -0,0 +1,84 @@ +// GoKEKS -- Go KEKS codec implementation +// Copyright (C) 2024-2025 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +package keks + +import ( + "io" + "math/big" + + "github.com/google/uuid" + "go.cypherpunks.su/keks/types" + "go.cypherpunks.su/tai64n/v4" +) + +type DecodeOpts struct { + // Maximal allowable string length. 0 means no limits, but pay + // attention that if there is no sufficient memory available, + // then Go may panic. + MaxStrLen int64 + + // Maximal allowable lists/maps length. 0 means no limits. + MaxContLen int64 + + // Disable UTF-8 codepoints validation check. + DisableUTF8Check bool + + // Leave TAI64* values as is, do not convert to time.Time during unmarshal. + LeaveTAI64 bool +} + +type Decoder struct { + R io.Reader + B []byte + + // After successful parsing of the data, it tells how many bytes + // were read. + Read int64 + + opts *DecodeOpts + + depth int8 + types []types.Type + depths []int8 + + bigints []*big.Int + bools []bool + ints []int64 + lens []int + rawTypes []AtomType + strs []string + tai64nas []tai64n.TAI64NA + tai64ns []tai64n.TAI64N + tai64s []tai64n.TAI64 + uints []uint64 + uuids []uuid.UUID + + blobChunkLens []int64 + blobChunkses [][]string +} + +// Initialise decoder that will read from b bytes. After the parse, +// d.B will keep the remaining unread portion of unprocessed data. +// Pay attention, that decoded strings and byte slices will reference +// the original bytes buffer, that is why it is much more faster decoder. +func NewDecoderFromBytes(b []byte, opts *DecodeOpts) (d *Decoder) { + return &Decoder{B: b, opts: opts} +} + +// Initialise decoder that will read from r reader. +func NewDecoderFromReader(r io.Reader, opts *DecodeOpts) (d *Decoder) { + return &Decoder{R: r, opts: opts} +} diff --git a/go/dec.go b/go/dec.go deleted file mode 100644 index 0d0fd23..0000000 --- a/go/dec.go +++ /dev/null @@ -1,205 +0,0 @@ -// GoKEKS -- Go KEKS codec implementation -// Copyright (C) 2024-2025 Sergey Matveev -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation, version 3 of the License. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this program. If not, see . - -package keks - -import ( - "bytes" - "errors" - "io" - - "go.cypherpunks.su/keks/atom" - "go.cypherpunks.su/keks/types" -) - -const parseMaxRecursionDepth = 1 << 10 - -var ( - ErrMapBadKey = errors.New("map bad key") - ErrMapUnordered = errors.New("map unordered") - ErrBlobBadAtom = errors.New("blob unexpected atom") - ErrBlobBadTerm = errors.New("blob bad terminator") - ErrUnexpectedEOC = errors.New("unexpected EOC") -) - -// Item is the base object, holding the type and corresponding value. -// Depending on the type, you have to type assert the value. -// - EOC, NIL holds nothing -// - Bool holds bool -// - UUID holds uuid.UUID -// - UInt holds uint64 -// - Int holds negative int64 -// - BigInt holds *big.Int -// - List holds []Item -// - Map holds map[string]Item -// - Blob holds Blob -// - Float (currently) holds atom.Raw -// - TAI64 holds []byte with with the TAI64 in external format. -// Look at its length to determine exact value -// - Bin holds []byte -// - Str holds string -// - Raw holds the atom.Raw -type Item struct { - V any - T types.Type -} - -func decode( - ctx *atom.Decoder, - allowContainers bool, - recursionDepth int, -) (item Item, read int64, err error) { - if recursionDepth > parseMaxRecursionDepth { - err = errors.New("deep recursion") - return - } - item.T, item.V, read, err = ctx.Decode() - if err != nil { - return - } - switch item.T { - case types.List: - if !allowContainers { - err = atom.ErrUnknownType - return - } - var sub Item - var subRead int64 - var v []Item - for { - sub, subRead, err = decode(ctx, true, recursionDepth+1) - read += subRead - if err != nil { - return - } - if sub.T == types.EOC { - break - } - v = append(v, sub) - } - item.V = v - return - case types.Map: - if !allowContainers { - err = atom.ErrUnknownType - return - } - v := make(map[string]Item) - var sub Item - var subRead int64 - var keyPrev string - for { - sub, subRead, err = decode(ctx, false, recursionDepth+1) - read += subRead - if err != nil { - return - } - if sub.T == types.EOC { - break - } - if sub.T != types.Str { - err = ErrMapBadKey - return - } - { - s := sub.V.(string) - if len(s) == 0 { - err = ErrMapBadKey - return - } - if len(s) < len(keyPrev) { - err = ErrMapUnordered - return - } else if (len(s) == len(keyPrev)) && s <= keyPrev { - err = ErrMapUnordered - return - } - keyPrev = s - } - sub, subRead, err = decode(ctx, true, recursionDepth+1) - read += subRead - if err != nil { - return - } - if sub.T == types.EOC { - err = ErrUnexpectedEOC - return - } - v[keyPrev] = sub - } - item.V = v - return - case types.Blob: - if !allowContainers { - err = atom.ErrUnknownType - return - } - chunkLen := int(item.V.(uint64)) - if ctx.MaxStrLen != 0 && int64(chunkLen) > ctx.MaxStrLen { - err = atom.ErrLenTooBig - return - } - v := Blob{ChunkLen: chunkLen} - var sub Item - var subRead int64 - var chunks []io.Reader - BlobCycle: - for { - sub, subRead, err = decode(ctx, false, recursionDepth+1) - read += subRead - if err != nil { - return - } - switch sub.T { - case types.NIL: - var buf []byte - buf, err = ctx.Want(chunkLen) - if err != nil { - return - } - read += int64(chunkLen) - chunks = append(chunks, bytes.NewReader(buf)) - v.DecodedLen += int64(chunkLen) - case types.Bin: - b := sub.V.([]byte) - if len(b) >= chunkLen { - err = ErrBlobBadTerm - return - } - if len(b) != 0 { - chunks = append(chunks, bytes.NewReader(b)) - v.DecodedLen += int64(len(b)) - } - break BlobCycle - default: - err = ErrBlobBadAtom - return - } - } - v.R = io.MultiReader(chunks...) - item.V = v - return - } - return -} - -// Decode single KEKS-encoded data item. -func Decode(ctx *atom.Decoder) (item Item, read int64, err error) { - item, read, err = decode(ctx, true, 0) - if item.T == types.EOC { - err = ErrUnexpectedEOC - } - return -} diff --git a/go/enc.go b/go/enc.go deleted file mode 100644 index 13c8309..0000000 --- a/go/enc.go +++ /dev/null @@ -1,137 +0,0 @@ -// GoKEKS -- Go KEKS codec implementation -// Copyright (C) 2024-2025 Sergey Matveev -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation, version 3 of the License. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this program. If not, see . - -package keks - -import ( - "bytes" - "fmt" - "io" - "math/big" - "sort" - - "github.com/google/uuid" - - "go.cypherpunks.su/keks/atom" - "go.cypherpunks.su/keks/types" -) - -// Encode an item. -func (item Item) Encode(w io.Writer) (written int64, err error) { - switch item.T { - case types.Invalid: - panic("invalid item's type met") - case types.NIL: - return atom.NILEncode(w) - case types.Bool: - return atom.BoolEncode(w, item.V.(bool)) - case types.UUID: - return atom.UUIDEncode(w, item.V.(uuid.UUID)) - case types.UInt: - return atom.UIntEncode(w, item.V.(uint64)) - case types.Int: - return atom.IntEncode(w, item.V.(int64)) - case types.BigInt: - return atom.BigIntEncode(w, item.V.(*big.Int)) - case types.List: - written, err = atom.ListEncode(w) - if err != nil { - return - } - var n int64 - for _, v := range item.V.([]Item) { - n, err = v.Encode(w) - written += n - if err != nil { - return - } - } - n, err = atom.EOCEncode(w) - written += n - case types.Map: - m := item.V.(map[string]Item) - keys := make([]string, 0, len(m)) - for k := range m { - keys = append(keys, k) - } - sort.Sort(ByLenFirst(keys)) - written, err = atom.MapEncode(w) - if err != nil { - return - } - var n int64 - for _, k := range keys { - n, err = atom.StrEncode(w, k) - written += n - if err != nil { - return - } - n, err = m[k].Encode(w) - written += n - if err != nil { - return - } - } - n, err = atom.EOCEncode(w) - written += n - case types.Blob: - blob := item.V.(Blob) - written, err = atom.BlobEncode(w, blob.ChunkLen) - if err != nil { - return - } - chunk := make([]byte, blob.ChunkLen) - var n int - var n64 int64 - for { - n, err = io.ReadFull(blob.R, chunk) - if err != nil { - if err == io.ErrUnexpectedEOF || err == io.EOF { - chunk = chunk[:n] - break - } - return - } - n64, err = atom.ChunkEncode(w, chunk) - written += n64 - if err != nil { - return - } - } - n64, err = atom.BinEncode(w, chunk) - written += n64 - case types.TAI64: - return atom.TAI64Encode(w, item.V.([]byte)) - case types.Bin: - return atom.BinEncode(w, item.V.([]byte)) - case types.Str: - return atom.StrEncode(w, item.V.(string)) - case types.Raw: - return atom.RawEncode(w, item.V.(atom.Raw)) - default: - panic(fmt.Errorf("unhandled type: %v", item.T)) - } - return -} - -// Append an encoded item to the provided buf. -func (item Item) EncodeBuf(buf []byte) ([]byte, error) { - var b bytes.Buffer - _, err := item.Encode(&b) - if err != nil { - return nil, err - } - return append(buf, b.Bytes()...), nil -} diff --git a/go/encode.go b/go/encode.go new file mode 100644 index 0000000..6dcbc29 --- /dev/null +++ b/go/encode.go @@ -0,0 +1,241 @@ +// GoKEKS -- Go KEKS codec implementation +// Copyright (C) 2024-2025 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +package keks + +import ( + "bytes" + "fmt" + "io" + "math/big" + "reflect" + "sort" + "strings" + "time" + + "github.com/google/uuid" + + keksort "go.cypherpunks.su/keks/internal/sort" + "go.cypherpunks.su/tai64n/v4" +) + +var bigIntZero = big.NewInt(0) + +func Encode(w io.Writer, v any) (written int64, err error) { + if v == nil { + return ByteEncode(w, byte(AtomNIL)) + } + switch v := v.(type) { + case []byte: + return BinEncode(w, v) + case *BlobChunked: + return BlobEncode(w, v.ChunkLen, v.Reader()) + case BlobChunked: + return BlobEncode(w, v.ChunkLen, v.Reader()) + case *BlobReader: + return BlobEncode(w, v.ChunkLen, v.R) + case BlobReader: + return BlobEncode(w, v.ChunkLen, v.R) + case time.Time: + v = tai64n.Leapsecs.Add(v) + if v.Nanosecond() > 0 { + var tai tai64n.TAI64N + tai.FromTime(v) + return TAI64NEncode(w, &tai) + } else { + var tai tai64n.TAI64 + tai.FromTime(v) + return TAI64Encode(w, &tai) + } + case Raw: + return io.Copy(w, bytes.NewReader(v)) + case *big.Int: + return BigIntEncode(w, v) + case bool: + return BoolEncode(w, v) + case uuid.UUID: + return UUIDEncode(w, &v) + case tai64n.TAI64: + return TAI64Encode(w, &v) + case *tai64n.TAI64: + return TAI64Encode(w, v) + case tai64n.TAI64N: + return TAI64NEncode(w, &v) + case *tai64n.TAI64N: + return TAI64NEncode(w, v) + case tai64n.TAI64NA: + return TAI64NAEncode(w, &v) + case *tai64n.TAI64NA: + return TAI64NAEncode(w, v) + case uint: + return UIntEncode(w, uint64(v)) + case uint8: + return UIntEncode(w, uint64(v)) + case uint16: + return UIntEncode(w, uint64(v)) + case uint32: + return UIntEncode(w, uint64(v)) + case uint64: + return UIntEncode(w, v) + case int: + return IntEncode(w, int64(v)) + case int8: + return IntEncode(w, int64(v)) + case int16: + return IntEncode(w, int64(v)) + case int32: + return IntEncode(w, int64(v)) + case int64: + return IntEncode(w, v) + case string: + return StrEncode(w, v) + } + vv := reflect.ValueOf(v) + switch reflect.TypeOf(v).Kind() { + case reflect.Pointer: + if vv.IsNil() { + return ByteEncode(w, byte(AtomNIL)) + } + return Encode(w, vv.Elem().Interface()) + case reflect.Slice: + _, err = ByteEncode(w, byte(AtomList)) + if err != nil { + return + } + written++ + var n64 int64 + if anys, ok := v.([]any); ok { + for _, v := range anys { + n64, err = Encode(w, v) + written += n64 + if err != nil { + return + } + } + } else { + for i := 0; i < vv.Len(); i++ { + n64, err = Encode(w, vv.Index(i).Interface()) + written += n64 + if err != nil { + return + } + } + } + _, err = ByteEncode(w, byte(AtomEOC)) + if err != nil { + return + } + written++ + return + case reflect.Map: + _, err = ByteEncode(w, byte(AtomMap)) + if err != nil { + return + } + written++ + keys := vv.MapKeys() + sort.Sort(keksort.ByLenFirstRV(keys)) + var n64 int64 + for _, k := range keys { + n64, err = StrEncode(w, k.String()) + written += n64 + if err != nil { + return + } + n64, err = Encode(w, vv.MapIndex(k).Interface()) + written += n64 + if err != nil { + return + } + } + _, err = ByteEncode(w, byte(AtomEOC)) + if err != nil { + return + } + written++ + return + } + if t := vv.Type(); t.Kind() == reflect.Struct { + fields := reflect.VisibleFields(t) + omits := make(map[string]struct{}) + for i := range fields { + if tag, ok := fields[i].Tag.Lookup("keks"); ok { + opts := strings.Split(tag, ",") + if opts[0] != "" { + fields[i].Name = opts[0] + if len(opts) == 2 && opts[1] == "omitempty" { + omits[opts[0]] = struct{}{} + } + } + } + } + sort.Sort(keksort.ByKEKSName(fields)) + _, err = ByteEncode(w, byte(AtomMap)) + if err != nil { + return + } + written++ + var n64 int64 + for i := range fields { + fv := vv.FieldByIndex(fields[i].Index) + { + var empty bool + switch fv.Type().Kind() { + case reflect.Pointer: + if fv.IsNil() { + empty = true + } + case reflect.Slice: + if fv.Len() == 0 { + empty = true + } + case reflect.Map: + if fv.Len() == 0 { + empty = true + } + } + if empty { + if _, ok := omits[fields[i].Name]; ok { + continue + } + } + } + n64, err = StrEncode(w, fields[i].Name) + written += n64 + if err != nil { + return + } + n64, err = Encode(w, fv.Interface()) + written += n64 + if err != nil { + return + } + } + _, err = ByteEncode(w, byte(AtomEOC)) + if err != nil { + return + } + written++ + } else { + err = fmt.Errorf("unhandled type: %+v", v) + } + return +} + +func EncodeBuf(v any) ([]byte, error) { + var b bytes.Buffer + _, err := Encode(&b, v) + return b.Bytes(), err +} diff --git a/go/fromgo.go b/go/fromgo.go deleted file mode 100644 index f357583..0000000 --- a/go/fromgo.go +++ /dev/null @@ -1,218 +0,0 @@ -// GoKEKS -- Go KEKS codec implementation -// Copyright (C) 2024-2025 Sergey Matveev -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation, version 3 of the License. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this program. If not, see . - -package keks - -import ( - "fmt" - "math/big" - "reflect" - "strings" - "time" - - "github.com/google/uuid" - "go.cypherpunks.su/tai64n/v4" - - "go.cypherpunks.su/keks/atom" - "go.cypherpunks.su/keks/types" -) - -func structTagRead(f reflect.StructField) (name string, omit bool) { - name = f.Name - v, ok := f.Tag.Lookup("keks") - if !ok { - return - } - opts := strings.Split(v, ",") - if opts[0] != "" { - name = opts[0] - } - if len(opts) == 2 && opts[1] == "omitempty" { - omit = true - } - return -} - -// Create an Item from native Go type, for its future encoding. -// Allowable types: -// - [*]atom.Raw -// - [*]Blob -// - *big.Int -// - []any -// - []byte -- will be converted to binary string -// - bool -// - int, int8, int16, int32, int64 -// - map[string]any -// - nil -// - string -// - struct -- will be interpreted as map[string]any -// - time.Time -- will be converted either to -// TAI64 (if nanoseconds=0), or TAI64N -// - uint, uint8, uint16, uint32, uint64 -// - uuid.UUID -func FromGo(v any) (Item, error) { - if v == nil { - return Item{T: types.NIL}, nil - } - rv := reflect.ValueOf(v) - if b, ok := v.([]byte); ok { - return Item{T: types.Bin, V: b}, nil - } - switch v := v.(type) { - case *Blob: - return Item{T: types.Blob, V: *v}, nil - case Blob: - return Item{T: types.Blob, V: v}, nil - case time.Time: - t := tai64n.Leapsecs.Add(v) - var taiRaw []byte - if t.Nanosecond() > 0 { - var tai tai64n.TAI64N - tai.FromTime(t) - taiRaw = tai[:] - } else { - var tai tai64n.TAI64 - tai.FromTime(t) - taiRaw = tai[:] - } - return Item{T: types.TAI64, V: taiRaw}, nil - case *atom.Raw: - return Item{T: types.Raw, V: *v}, nil - case atom.Raw: - return Item{T: types.Raw, V: v}, nil - case *big.Int: - return Item{T: types.BigInt, V: v}, nil - } - switch reflect.TypeOf(v).Kind() { - case reflect.Pointer: - if rv.IsNil() { - return Item{T: types.NIL}, nil - } - return FromGo(rv.Elem().Interface()) - case reflect.Slice: - var ret []Item - var err error - if anys, ok := v.([]any); ok { - for _, v := range anys { - var item Item - item, err = FromGo(v) - if err != nil { - return item, err - } - ret = append(ret, item) - } - } else { - rv = reflect.ValueOf(v) - for i := 0; i < rv.Len(); i++ { - var item Item - item, err = FromGo(rv.Index(i).Interface()) - if err != nil { - return item, err - } - ret = append(ret, item) - } - } - return Item{T: types.List, V: ret}, nil - case reflect.Map: - ret := make(map[string]Item, rv.Len()) - iter := rv.MapRange() - var err error - for iter.Next() { - var item Item - item, err = FromGo(iter.Value().Interface()) - if err != nil { - return item, err - } - ret[iter.Key().String()] = item - } - return Item{T: types.Map, V: ret}, nil - } - { - t := rv.Type() - if t.Kind() == reflect.Struct { - ret := make(map[string]Item) - for _, f := range reflect.VisibleFields(t) { - fv := rv.FieldByIndex(f.Index) - name, omit := structTagRead(f) - var empty bool - item, err := FromGo(fv.Interface()) - if err != nil { - return item, err - } - switch item.T { - case types.NIL: - empty = true - case types.List: - if len(item.V.([]Item)) == 0 { - empty = true - } - case types.Map: - if len(item.V.(map[string]Item)) == 0 { - empty = true - } - } - if !(omit && empty) { - ret[name] = item - } - } - return Item{T: types.Map, V: ret}, nil - } - } - switch v := v.(type) { - case bool: - return Item{T: types.Bool, V: v}, nil - case uuid.UUID: - return Item{T: types.UUID, V: v}, nil - case uint: - return Item{T: types.UInt, V: uint64(v)}, nil - case uint8: - return Item{T: types.UInt, V: uint64(v)}, nil - case uint16: - return Item{T: types.UInt, V: uint64(v)}, nil - case uint32: - return Item{T: types.UInt, V: uint64(v)}, nil - case uint64: - return Item{T: types.UInt, V: v}, nil - case int: - if v >= 0 { - return Item{T: types.UInt, V: uint64(v)}, nil - } - return Item{T: types.Int, V: int64(v)}, nil - case int8: - if v >= 0 { - return Item{T: types.UInt, V: uint64(v)}, nil - } - return Item{T: types.Int, V: int64(v)}, nil - case int16: - if v >= 0 { - return Item{T: types.UInt, V: uint64(v)}, nil - } - return Item{T: types.Int, V: int64(v)}, nil - case int32: - if v >= 0 { - return Item{T: types.UInt, V: uint64(v)}, nil - } - return Item{T: types.Int, V: int64(v)}, nil - case int64: - if v >= 0 { - return Item{T: types.UInt, V: uint64(v)}, nil - } - return Item{T: types.Int, V: v}, nil - case string: - return Item{T: types.Str, V: v}, nil - default: - return Item{}, fmt.Errorf("unhandled type: %+v", v) - } -} diff --git a/go/fuzz_test.go b/go/fuzz_test.go index 1640a65..b7f4c4a 100644 --- a/go/fuzz_test.go +++ b/go/fuzz_test.go @@ -3,33 +3,32 @@ package keks import ( "bytes" "testing" - - "go.cypherpunks.su/keks/atom" ) -func FuzzItemDecode(f *testing.F) { - var item Item +func FuzzDecode(f *testing.F) { var err error - var e any + var v any var buf bytes.Buffer + var d *Decoder + opts := DecodeOpts{MaxStrLen: 1 << 20} f.Fuzz(func(t *testing.T, b []byte) { - item, _, err = Decode(&atom.Decoder{B: b, MaxStrLen: 1 << 20}) + d = NewDecoderFromBytes(b, &opts) + _, err = d.Parse() if err == nil { - e, err = item.ToGo() - if err != nil { - t.Fail() - } - item, err = FromGo(e) + v, err = d.Unmarshal() if err != nil { - t.Fail() + if err == ErrLeapSecond { + return + } + t.Fatal(err) } buf.Reset() - _, err = item.Encode(&buf) + _, err = Encode(&buf, v) if err != nil { - t.Fail() + t.Fatal(err) } if !bytes.Equal(buf.Bytes(), b[:buf.Len()]) { - t.Fail() + t.Fatal("not equal") } } }) diff --git a/go/atom/ctx.go b/go/getter.go similarity index 60% rename from go/atom/ctx.go rename to go/getter.go index 138d442..f34bf23 100644 --- a/go/atom/ctx.go +++ b/go/getter.go @@ -13,38 +13,51 @@ // You should have received a copy of the GNU Lesser General Public // License along with this program. If not, see . -package atom +package keks -import "io" +import ( + "io" + "unsafe" +) -type Decoder struct { - // You have to set one of R or B as a data source. Decoding from the - // B buffer takes less allocations, it is faster. - R io.Reader - B []byte - - // Maximal allowable string length. 0 means no limits, but pay - // attention that if there is no sufficient memory available, - // then Go may panic. - MaxStrLen int64 - - // Disable UTF-8 codepoints validation check. - DisableUTF8Check bool +func (ctx *Decoder) getByte() (b byte, err error) { + if ctx.B == nil { + var buf [1]byte + _, err = ctx.R.Read(buf[:]) + b = buf[0] + ctx.Read++ + return + } + if len(ctx.B) < 1 { + err = io.ErrUnexpectedEOF + return + } + b, ctx.B = ctx.B[0], ctx.B[1:] + ctx.Read++ + return } // Read n bytes from the data source. If data source is R, then buf is // allocated for each new read. If data source is B, then buf is a slice // of the original B buffer. -func (ctx *Decoder) Want(n int) (buf []byte, err error) { +func (ctx *Decoder) getBytes(n int) (s string, err error) { + var read int if ctx.B == nil { - buf = make([]byte, n) - _, err = io.ReadFull(ctx.R, buf) + buf := make([]byte, n) + read, err = io.ReadFull(ctx.R, buf) + ctx.Read += int64(read) + if err != nil { + return + } + s = unsafe.String(unsafe.SliceData(buf), len(buf)) return } if len(ctx.B) < n { err = io.ErrUnexpectedEOF return } - buf, ctx.B = ctx.B[:n], ctx.B[n:] + s = unsafe.String(unsafe.SliceData(ctx.B[:n]), n) + ctx.B = ctx.B[n:] + ctx.Read += int64(n) return } diff --git a/go/internal/sort/name.go b/go/internal/sort/name.go new file mode 100644 index 0000000..305b141 --- /dev/null +++ b/go/internal/sort/name.go @@ -0,0 +1,25 @@ +package sort + +import "reflect" + +type ByKEKSName []reflect.StructField + +func (a ByKEKSName) Len() int { + return len(a) +} + +func (a ByKEKSName) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a ByKEKSName) Less(i, j int) bool { + ai := a[i].Name + aj := a[j].Name + if len(ai) < len(aj) { + return true + } + if len(ai) > len(aj) { + return false + } + return ai < aj +} diff --git a/go/internal/sort/rv.go b/go/internal/sort/rv.go new file mode 100644 index 0000000..151903e --- /dev/null +++ b/go/internal/sort/rv.go @@ -0,0 +1,25 @@ +package sort + +import "reflect" + +type ByLenFirstRV []reflect.Value + +func (a ByLenFirstRV) Len() int { + return len(a) +} + +func (a ByLenFirstRV) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a ByLenFirstRV) Less(i, j int) bool { + ai := a[i].String() + aj := a[j].String() + if len(ai) < len(aj) { + return true + } + if len(ai) > len(aj) { + return false + } + return ai < aj +} diff --git a/go/iter.go b/go/iter.go new file mode 100644 index 0000000..bb327f7 --- /dev/null +++ b/go/iter.go @@ -0,0 +1,152 @@ +// GoKEKS -- Go KEKS codec implementation +// Copyright (C) 2024-2025 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +package keks + +import ( + "math/big" + + "github.com/google/uuid" + "go.cypherpunks.su/keks/types" + "go.cypherpunks.su/tai64n/v4" +) + +// Iterate over parsed elements of Decoder. You call iter.Next() function to +// proceed to the next decoded element. Then you look at iter.T to determine +// the type of the element. Then you call necessary method to get the +// native Go's type. If the element is either list or map, then +// iter.Len() will tell you how many elements it contains and how many +// times you have to call Next to get them. +type Iterator struct { + ctx *Decoder + + T types.Type + Depth int + + i int + bigints int + blobs int + bools int + ints int + lens int + raws int + strs int + tai64nas int + tai64ns int + tai64s int + uints int + uuids int +} + +func (ctx *Decoder) Iter() *Iterator { + return &Iterator{ctx: ctx, i: -1, T: types.Invalid} +} + +// Proceed to the next parsed element. Returns false if end is reached. +func (iter *Iterator) Next() bool { + switch iter.T { + case types.Bool: + iter.bools++ + case types.UUID: + iter.uuids++ + case types.UInt: + iter.uints++ + case types.Int: + iter.ints++ + case types.BigInt: + iter.bigints++ + case types.List, types.Map: + iter.lens++ + case types.Blob: + iter.blobs++ + case types.TAI64: + iter.tai64s++ + case types.TAI64N: + iter.tai64ns++ + case types.TAI64NA: + iter.tai64nas++ + case types.Bin, types.Str: + iter.strs++ + case types.Raw: + iter.raws++ + iter.strs++ + } + iter.i++ + if iter.i >= len(iter.ctx.types) { + iter.T = types.Invalid + return false + } + iter.T = iter.ctx.types[iter.i] + iter.Depth = int(iter.ctx.depths[iter.i]) + return true +} + +func (iter *Iterator) Bool() bool { + return iter.ctx.bools[iter.bools] +} + +func (iter *Iterator) UUID() uuid.UUID { + return iter.ctx.uuids[iter.uuids] +} + +func (iter *Iterator) UInt() uint64 { + return iter.ctx.uints[iter.uints] +} + +func (iter *Iterator) Int() int64 { + return iter.ctx.ints[iter.ints] +} + +func (iter *Iterator) BigInt() *big.Int { + return iter.ctx.bigints[iter.bigints] +} + +func (iter *Iterator) Blob() BlobChunked { + return BlobChunked{ + ChunkLen: iter.ctx.blobChunkLens[iter.blobs], + Chunks: iter.ctx.blobChunkses[iter.blobs], + } +} + +func (iter *Iterator) TAI64() *tai64n.TAI64 { + return &iter.ctx.tai64s[iter.tai64s] +} + +func (iter *Iterator) TAI64N() *tai64n.TAI64N { + return &iter.ctx.tai64ns[iter.tai64ns] +} + +func (iter *Iterator) TAI64NA() *tai64n.TAI64NA { + return &iter.ctx.tai64nas[iter.tai64nas] +} + +func (iter *Iterator) Bin() []byte { + return []byte(iter.ctx.strs[iter.strs]) +} + +func (iter *Iterator) Str() string { + return iter.ctx.strs[iter.strs] +} + +func (iter *Iterator) Raw() Raw { + return Raw(append( + []byte{byte(iter.ctx.rawTypes[iter.raws])}, + iter.ctx.strs[iter.strs]..., + )) +} + +func (iter *Iterator) Len() int { + return iter.ctx.lens[iter.lens] +} diff --git a/go/mapstruct/map.go b/go/mapstruct/map.go deleted file mode 100644 index 640f40e..0000000 --- a/go/mapstruct/map.go +++ /dev/null @@ -1,31 +0,0 @@ -// GoKEKS -- Go KEKS codec implementation -// Copyright (C) 2024-2025 Sergey Matveev -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation, version 3 of the License. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this program. If not, see . - -package mapstruct - -import ( - "github.com/mitchellh/mapstructure" -) - -// Fill up dst structure with the contents taken from the src map. -func FromMap(dst any, src map[string]any) error { - decoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ - Result: dst, TagName: "keks", - }) - if err != nil { - return err - } - return decoder.Decode(src) -} diff --git a/go/mk-fuzz-testdata b/go/mk-fuzz-testdata index 32e91d0..af9e3f3 100755 --- a/go/mk-fuzz-testdata +++ b/go/mk-fuzz-testdata @@ -7,7 +7,7 @@ cd $tmp PATH="$root/../tcl:$PATH" mk-fuzz-inputs cd "$root" -dst=testdata/fuzz/FuzzItemDecode +dst=testdata/fuzz/FuzzDecode mkdir -p $dst # go install golang.org/x/tools/cmd/file2fuzz@latest file2fuzz -o $dst $tmp/* diff --git a/go/parse.go b/go/parse.go new file mode 100644 index 0000000..8b4107b --- /dev/null +++ b/go/parse.go @@ -0,0 +1,179 @@ +// GoKEKS -- Go KEKS codec implementation +// Copyright (C) 2024-2025 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +package keks + +import ( + "errors" + + "go.cypherpunks.su/keks/types" +) + +var ( + ErrMapBadKey = errors.New("map bad key") + ErrMapUnordered = errors.New("map unordered") + ErrBlobBadAtom = errors.New("blob unexpected atom") + ErrBlobBadTerm = errors.New("blob bad terminator") + ErrUnexpectedEOC = errors.New("unexpected EOC") + ErrTooDeep = errors.New("too deep structure") +) + +func (ctx *Decoder) deTail() { + ctx.types = ctx.types[:len(ctx.types)-1] + ctx.depths = ctx.depths[:len(ctx.depths)-1] +} + +func (ctx *Decoder) parse() (t types.Type, err error) { + t, err = ctx.DecodeAtom() + if err != nil { + return + } + switch t { + case types.List: + ctx.depth++ + if ctx.depth < 0 { + err = ErrTooDeep + return + } + ctx.lens = append(ctx.lens, 0) + idx := len(ctx.lens) - 1 + var count int + var sub types.Type + for { + sub, err = ctx.parse() + if err != nil { + return + } + if sub == types.EOC { + ctx.deTail() + break + } + count++ + if ctx.opts != nil && + ctx.opts.MaxContLen > 0 && + int64(count) >= ctx.opts.MaxContLen { + err = ErrLenTooBig + return + } + } + ctx.lens[idx] = count + ctx.depth-- + case types.Map: + ctx.depth++ + if ctx.depth < 0 { + err = ErrTooDeep + return + } + ctx.lens = append(ctx.lens, 0) + idx := len(ctx.lens) - 1 + var count int + var sub types.Type + var keyPrev string + for { + sub, err = ctx.DecodeAtom() + if err != nil { + return + } + if sub == types.EOC { + ctx.deTail() + break + } + if sub != types.Str { + err = ErrMapBadKey + return + } + { + s := ctx.strs[len(ctx.strs)-1] + if len(s) == 0 { + err = ErrMapBadKey + return + } + if len(s) < len(keyPrev) { + err = ErrMapUnordered + return + } else if (len(s) == len(keyPrev)) && s <= keyPrev { + err = ErrMapUnordered + return + } + keyPrev = s + } + sub, err = ctx.parse() + if err != nil { + return + } + if sub == types.EOC { + err = ErrUnexpectedEOC + return + } + count++ + } + ctx.lens[idx] = count + ctx.depth-- + case types.Blob: + chunkLen := ctx.blobChunkLens[len(ctx.blobChunkLens)-1] + if ctx.opts != nil && ctx.opts.MaxStrLen != 0 && chunkLen > ctx.opts.MaxStrLen { + err = ErrLenTooBig + return + } + var chunks []string + var sub types.Type + var s string + BlobCycle: + for { + sub, err = ctx.DecodeAtom() + if err != nil { + return + } + switch sub { + case types.NIL: + ctx.deTail() + s, err = ctx.getBytes(int(chunkLen)) + if err != nil { + return + } + chunks = append(chunks, s) + case types.Bin: + s = ctx.strs[len(ctx.strs)-1] + if int64(len(s)) >= chunkLen { + err = ErrBlobBadTerm + return + } + if len(s) != 0 { + chunks = append(chunks, s) + } + ctx.deTail() + ctx.strs = ctx.strs[:len(ctx.strs)-1] + break BlobCycle + default: + err = ErrBlobBadAtom + return + } + } + ctx.blobChunkses = append(ctx.blobChunkses, chunks) + return + } + return +} + +// Parse raw data into internal structures. +// t is the type of the decoded element. +// You will use ctx.Iter or ctx.Unmarshal functions after that. +func (ctx *Decoder) Parse() (t types.Type, err error) { + t, err = ctx.parse() + if t == types.EOC { + err = ErrUnexpectedEOC + } + return +} diff --git a/go/pki/av.go b/go/pki/av.go index d4614c1..f8bf2e0 100644 --- a/go/pki/av.go +++ b/go/pki/av.go @@ -45,11 +45,10 @@ func (av *AV) Id() (id uuid.UUID) { id = uuid.Nil return } - item, err := keks.FromGo(av) + _, err := keks.Encode(hasher, av) if err != nil { panic(err) } - item.Encode(hasher) id, err = uuid.NewRandomFromReader(bytes.NewReader(hasher.Sum(nil))) if err != nil { panic(err) diff --git a/go/pki/cer.go b/go/pki/cer.go index eb43aec..cdb5ac8 100644 --- a/go/pki/cer.go +++ b/go/pki/cer.go @@ -24,7 +24,6 @@ import ( "github.com/google/uuid" "go.cypherpunks.su/keks" - "go.cypherpunks.su/keks/mapstruct" ed25519blake2b "go.cypherpunks.su/keks/pki/ed25519-blake2b" "go.cypherpunks.su/keks/pki/gost" ) @@ -73,7 +72,7 @@ func (sd *SignedData) CerParse() error { var load CerLoad var err error if v, ok := sd.Load.V.(map[string]any); ok { - err = mapstruct.FromMap(&load, v) + err = keks.Map2Struct(&load, v) } else { err = errors.New("CerParse: wrong /load/v") } @@ -191,12 +190,7 @@ func (sd *SignedData) CerCheckSignatureFrom(parent *CerLoad) (err error) { return } tbs := SignedDataTBS{T: sd.Load.T, V: sd.Load.V, TBS: sig.TBS} - var item keks.Item - item, err = keks.FromGo(tbs) - if err != nil { - return - } - buf, err := item.EncodeBuf(nil) + buf, err := keks.EncodeBuf(tbs) if err != nil { return } diff --git a/go/pki/cmd/kekscertool/main.go b/go/pki/cmd/kekscertool/main.go index d6bfba4..d09e97b 100644 --- a/go/pki/cmd/kekscertool/main.go +++ b/go/pki/cmd/kekscertool/main.go @@ -136,7 +136,6 @@ func main() { log.Fatal("no -prv is set") } - var item keks.Item var prv crypto.Signer var prvRaw []byte var pub []byte @@ -157,12 +156,8 @@ func main() { if err != nil { log.Fatal(err) } - item, err = keks.FromGo(pki.AV{A: *algo, V: prvRaw}) - if err != nil { - log.Fatal(err) - } var data []byte - data, err = item.EncodeBuf(nil) + data, err = keks.EncodeBuf(pki.AV{A: *algo, V: prvRaw}) if err != nil { log.Fatal(err) } @@ -194,12 +189,8 @@ func main() { log.Fatal(err) } - item, err = keks.FromGo(sd) - if err != nil { - log.Fatal(err) - } var data []byte - data, err = item.EncodeBuf(nil) + data, err = keks.EncodeBuf(sd) if err != nil { log.Fatal(err) } diff --git a/go/pki/cmd/kekssdtool/main.go b/go/pki/cmd/kekssdtool/main.go index 52e56de..9ff790e 100644 --- a/go/pki/cmd/kekssdtool/main.go +++ b/go/pki/cmd/kekssdtool/main.go @@ -113,13 +113,8 @@ func main() { if err != nil { log.Fatal(err) } - var item keks.Item - item, err = keks.FromGo(sd) - if err != nil { - log.Fatal(err) - } var data []byte - data, err = item.EncodeBuf(nil) + data, err = keks.EncodeBuf(sd) if err != nil { log.Fatal(err) } diff --git a/go/pki/prv.go b/go/pki/prv.go index 527e33f..6bead41 100644 --- a/go/pki/prv.go +++ b/go/pki/prv.go @@ -20,8 +20,7 @@ import ( "errors" "fmt" - "go.cypherpunks.su/keks/atom" - "go.cypherpunks.su/keks/mapstruct" + "go.cypherpunks.su/keks" ed25519blake2b "go.cypherpunks.su/keks/pki/ed25519-blake2b" "go.cypherpunks.su/keks/pki/gost" ) @@ -29,12 +28,12 @@ import ( // Parse private key contained in AV KEKS-encoded structure. func PrvParse(data []byte) (prv crypto.Signer, pub []byte, err error) { var av AV - var tail []byte - err = mapstruct.Decode(&av, &atom.Decoder{B: data, MaxStrLen: 1<<16}) + d := keks.NewDecoderFromBytes(data, &keks.DecodeOpts{MaxStrLen: 1 << 16}) + err = d.DecodeStruct(&av) if err != nil { return } - if len(tail) != 0 { + if len(d.B) != 0 { err = errors.New("trailing data") return } diff --git a/go/pki/signed-data.go b/go/pki/signed-data.go index 3e81ff2..e95c30b 100644 --- a/go/pki/signed-data.go +++ b/go/pki/signed-data.go @@ -24,9 +24,6 @@ import ( "github.com/google/uuid" "go.cypherpunks.su/keks" - "go.cypherpunks.su/keks/atom" - "go.cypherpunks.su/keks/mapstruct" - "go.cypherpunks.su/keks/types" ) type SignedDataLoad struct { @@ -61,24 +58,8 @@ type SignedData struct { Sigs []*Sig `keks:"sigs"` } -// Parse signed-data from decoded item. -func SignedDataParseItem(item keks.Item) (sd *SignedData, err error) { - if item.T != types.Map { - err = errors.New("SignedDataParse: non-map") - return - } - var _sd SignedData - var e any - e, err = item.ToGo() - if err != nil { - return - } - err = mapstruct.FromMap(&_sd, e.(map[string]any)) - if err != nil { - return - } - sd = &_sd - +// Validate parsed signed-data structure. +func SignedDataValidate(sd *SignedData) (err error) { if sd.Hashes != nil && len(*sd.Hashes) == 0 { err = errors.New("SignedDataParse: empty /hash") return @@ -139,14 +120,15 @@ func SignedDataParseItem(item keks.Item) (sd *SignedData, err error) { // Parse signed-data from KEKS-encoded data. This is just a wrapper over // SignedDataParseItem. -func SignedDataParse(data []byte) (sd *SignedData, err error) { - var item keks.Item - item, _, err = keks.Decode(&atom.Decoder{B: data}) +func SignedDataParse(data []byte) (*SignedData, error) { + d := keks.NewDecoderFromBytes(data, nil) + var sd SignedData + err := d.DecodeStruct(&sd) if err != nil { - return + return nil, err } - sd, err = SignedDataParseItem(item) - return + err = SignedDataValidate(&sd) + return &sd, err } // Sign SignedData's contents and sigTBS corresponding data with the @@ -164,18 +146,14 @@ func (sd *SignedData) SignWith( sdTBS := SignedDataTBS{T: sd.Load.T, V: sd.Load.V, TBS: sigTBS} sig := Sig{TBS: sigTBS} sig.Sign.A = parent.Pub[0].A - var item keks.Item - item, err = keks.FromGo(sdTBS) - if err != nil { - return - } - buf, err := item.EncodeBuf(nil) + var buf []byte + buf, err = keks.EncodeBuf(sdTBS) if err != nil { return } sig.Sign.V, err = prv.Sign(rand.Reader, buf, crypto.Hash(0)) if err != nil { - return err + return } sd.Sigs = append(sd.Sigs, &sig) return nil diff --git a/go/raw.go b/go/raw.go new file mode 100644 index 0000000..22555bf --- /dev/null +++ b/go/raw.go @@ -0,0 +1,3 @@ +package keks + +type Raw []byte diff --git a/go/sort.go b/go/sort.go deleted file mode 100644 index 6859711..0000000 --- a/go/sort.go +++ /dev/null @@ -1,22 +0,0 @@ -package keks - -// Bitewise sorting by length first. -type ByLenFirst []string - -func (a ByLenFirst) Len() int { - return len(a) -} - -func (a ByLenFirst) Swap(i, j int) { - a[i], a[j] = a[j], a[i] -} - -func (a ByLenFirst) Less(i, j int) bool { - if len(a[i]) < len(a[j]) { - return true - } - if len(a[i]) > len(a[j]) { - return false - } - return a[i] < a[j] -} diff --git a/go/mapstruct/dec.go b/go/str.go similarity index 56% rename from go/mapstruct/dec.go rename to go/str.go index 42ac5d5..d73e89e 100644 --- a/go/mapstruct/dec.go +++ b/go/str.go @@ -13,33 +13,42 @@ // You should have received a copy of the GNU Lesser General Public // License along with this program. If not, see . -package mapstruct +package keks -import ( - "errors" +import "go.cypherpunks.su/keks/be" - "go.cypherpunks.su/keks" - "go.cypherpunks.su/keks/atom" - "go.cypherpunks.su/keks/types" -) - -// Decode KEKS-encoded data to the dst structure. -// It will return an error if decoded data is not map. -func Decode(dst any, ctx *atom.Decoder) (err error) { - var item keks.Item - item, _, err = keks.Decode(ctx) - if err != nil { - return +func (ctx *Decoder) getStr(tag byte) (s string, err error) { + l := int64(tag & 63) + var ll int + switch l { + case 61: + ll = 1 + case 62: + ll = 2 + l += ((1 << 8) - 1) + case 63: + ll = 8 + l += ((1 << 8) - 1) + ((1 << 16) - 1) + } + if ll != 0 { + s, err = ctx.getBytes(ll) + if err != nil { + return + } + ul := be.Get([]byte(s)) + if ul > (1<<63)-(63+((1<<8)-1)+((1<<16)-1)) { + err = ErrLenTooBig + return + } + l += int64(ul) } - if item.T != types.Map { - err = errors.New("non-map") + if ctx.opts != nil && ctx.opts.MaxStrLen > 0 && l > ctx.opts.MaxStrLen { + err = ErrLenTooBig return } - var e any - e, err = item.ToGo() + s, err = ctx.getBytes(int(l)) if err != nil { return } - err = FromMap(dst, e.(map[string]any)) return } diff --git a/go/togo.go b/go/togo.go deleted file mode 100644 index 6ced137..0000000 --- a/go/togo.go +++ /dev/null @@ -1,104 +0,0 @@ -// GoKEKS -- Go KEKS codec implementation -// Copyright (C) 2024-2025 Sergey Matveev -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation, version 3 of the License. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this program. If not, see . - -package keks - -import ( - "fmt" - "math/big" - - "github.com/google/uuid" - "go.cypherpunks.su/tai64n/v4" - - "go.cypherpunks.su/keks/atom" - "go.cypherpunks.su/keks/types" -) - -// Convert an item to various native Go types, atom.Raw, Blob, uuid.UUID. -// Pay attention that f TAI equals to leap second, then it is converted to Raw. -func (item Item) ToGo() (any, error) { - switch item.T { - case types.NIL: - return nil, nil - case types.Bool: - return item.V.(bool), nil - case types.UUID: - return item.V.(uuid.UUID), nil - case types.UInt: - return item.V.(uint64), nil - case types.Int: - return item.V.(int64), nil - case types.List: - var ret []any - var err error - for _, v := range item.V.([]Item) { - var e any - e, err = v.ToGo() - if err != nil { - return nil, err - } - ret = append(ret, e) - } - return ret, nil - case types.Map: - ret := make(map[string]any) - var err error - for k, v := range item.V.(map[string]Item) { - var e any - e, err = v.ToGo() - if err != nil { - return nil, err - } - ret[k] = e - } - return ret, nil - case types.Blob: - return item.V.(Blob), nil - case types.BigInt: - return item.V.(*big.Int), nil - case types.Float: - panic("float is unsupported") - case types.TAI64: - raw := item.V.([]byte) - switch len(raw) { - case tai64n.TAI64Size: - tai := tai64n.TAI64(raw) - t, isLeap := tai64n.Leapsecs.Sub(tai.Time()) - if isLeap { - return atom.Raw{T: atom.TAI64, V: raw}, nil - } - return t, nil - case tai64n.TAI64NSize: - tai := tai64n.TAI64N(raw) - t, isLeap := tai64n.Leapsecs.Sub(tai.Time()) - if isLeap { - return atom.Raw{T: atom.TAI64N, V: raw}, nil - } - return t, nil - case tai64n.TAI64NASize: - return atom.Raw{T: atom.TAI64NA, V: raw}, nil - default: - panic("wrong TAI64 value") - } - case types.Bin: - return item.V.([]byte), nil - case types.Str: - return item.V.(string), nil - case types.Raw: - return item.V.(atom.Raw), nil - default: - return nil, fmt.Errorf("unhandled type: %+v", item) - } -} diff --git a/go/type.go b/go/type.go new file mode 100644 index 0000000..f094e08 --- /dev/null +++ b/go/type.go @@ -0,0 +1,28 @@ +package keks + +type AtomType byte + +//go:generate stringer -type=AtomType +const ( + AtomEOC AtomType = 0x00 + AtomNIL AtomType = 0x01 + AtomFalse AtomType = 0x02 + AtomTrue AtomType = 0x03 + AtomUUID AtomType = 0x04 + AtomList AtomType = 0x08 + AtomMap AtomType = 0x09 + AtomBLOB AtomType = 0x0B + AtomPInt AtomType = 0x0C + AtomNInt AtomType = 0x0D + AtomFloat16 AtomType = 0x10 + AtomFloat32 AtomType = 0x11 + AtomFloat64 AtomType = 0x12 + AtomFloat128 AtomType = 0x13 + AtomFloat256 AtomType = 0x14 + AtomTAI64 AtomType = 0x18 + AtomTAI64N AtomType = 0x19 + AtomTAI64NA AtomType = 0x1A + + AtomStrings = 0x80 + AtomIsUTF8 = 0x40 +) diff --git a/go/types/type.go b/go/types/type.go index fcfea0e..e333044 100644 --- a/go/types/type.go +++ b/go/types/type.go @@ -17,6 +17,8 @@ const ( Blob Float TAI64 + TAI64N + TAI64NA Bin Str Raw diff --git a/go/types/type_string.go b/go/types/type_string.go index 2cbecdf..a1ed2f9 100644 --- a/go/types/type_string.go +++ b/go/types/type_string.go @@ -21,14 +21,16 @@ func _() { _ = x[Blob-10] _ = x[Float-11] _ = x[TAI64-12] - _ = x[Bin-13] - _ = x[Str-14] - _ = x[Raw-15] + _ = x[TAI64N-13] + _ = x[TAI64NA-14] + _ = x[Bin-15] + _ = x[Str-16] + _ = x[Raw-17] } -const _Type_name = "InvalidEOCNILBoolUUIDUIntIntBigIntListMapBlobFloatTAI64BinStrRaw" +const _Type_name = "InvalidEOCNILBoolUUIDUIntIntBigIntListMapBlobFloatTAI64TAI64NTAI64NABinStrRaw" -var _Type_index = [...]uint8{0, 7, 10, 13, 17, 21, 25, 28, 34, 38, 41, 45, 50, 55, 58, 61, 64} +var _Type_index = [...]uint8{0, 7, 10, 13, 17, 21, 25, 28, 34, 38, 41, 45, 50, 55, 61, 68, 71, 74, 77} func (i Type) String() string { if i >= Type(len(_Type_index)-1) { diff --git a/go/unmarshal.go b/go/unmarshal.go new file mode 100644 index 0000000..88f10a2 --- /dev/null +++ b/go/unmarshal.go @@ -0,0 +1,179 @@ +// GoKEKS -- Go KEKS codec implementation +// Copyright (C) 2024-2025 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +package keks + +import ( + "errors" + "fmt" + "time" + + "github.com/mitchellh/mapstructure" + "go.cypherpunks.su/keks/types" + "go.cypherpunks.su/tai64n/v4" +) + +var ErrLeapSecond = errors.New("leap second") + +func toUTC(t time.Time) (ret time.Time, err error) { + var isLeap bool + ret, isLeap = tai64n.Leapsecs.Sub(t) + if isLeap { + err = ErrLeapSecond + } + return +} + +func (ctx *Decoder) unmarshal(iter *Iterator) (v any, err error) { + switch iter.T { + case types.NIL: + return nil, nil + case types.Bool: + return iter.Bool(), nil + case types.UUID: + return iter.UUID(), nil + case types.UInt: + return iter.UInt(), nil + case types.Int: + return iter.Int(), nil + case types.List: + ret := make([]any, 0, iter.Len()) + for range iter.Len() { + if !iter.Next() { + panic("less list elements than expected") + } + var e any + e, err = ctx.unmarshal(iter) + if err != nil { + return + } + ret = append(ret, e) + } + v = ret + return + case types.Map: + ret := make(map[string]any, iter.Len()) + var key string + for range iter.Len() { + if !iter.Next() { + panic("less map elements than expected") + } + if iter.T != types.Str { + panic("unexpected non-str key") + } + key = iter.Str() + if !iter.Next() { + panic("less map elements than expected") + } + var e any + e, err = ctx.unmarshal(iter) + if err != nil { + return + } + ret[key] = e + } + v = ret + return + case types.Blob: + return iter.Blob(), nil + case types.BigInt: + return iter.BigInt(), nil + case types.Float: + panic("float is unsupported") + case types.TAI64: + t := iter.TAI64() + if ctx.opts != nil && ctx.opts.LeaveTAI64 { + return t, nil + } else { + return toUTC(t.Time()) + } + case types.TAI64N: + t := iter.TAI64N() + if ctx.opts != nil && ctx.opts.LeaveTAI64 { + return t, nil + } else { + return toUTC(t.Time()) + } + case types.TAI64NA: + return iter.TAI64NA(), nil + case types.Bin: + return iter.Bin(), nil + case types.Str: + return iter.Str(), nil + case types.Raw: + return iter.Raw(), nil + default: + return nil, fmt.Errorf("unhandled type: %+v", iter.T) + } +} + +// Unmarshal previously parsed data (ctx.Parse) to native Go type. +func (ctx *Decoder) Unmarshal() (any, error) { + iter := ctx.Iter() + if !iter.Next() { + return nil, errors.New("no parsed data") + } + v, err := ctx.unmarshal(iter) + if err != nil { + return v, err + } + if iter.Next() { + panic("unread elements left") + } + return v, err +} + +// Fill up dst structure with the contents taken from the src map. +func Map2Struct(dst any, src map[string]any) error { + decoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + Result: dst, TagName: "keks", + }) + if err != nil { + return err + } + return decoder.Decode(src) +} + +// Unmarshal previously parsed data (ctx.Parse) to dst structure. +// Data must be an encoded map to succeed in that. +func (ctx *Decoder) UnmarshalStruct(dst any) error { + srcAny, err := ctx.Unmarshal() + if err != nil { + return err + } + src, ok := srcAny.(map[string]any) + if !ok { + return errors.New("non-map decoded") + } + return Map2Struct(dst, src) +} + +// Just a convenient call to ctx.Parse and ctx.Unmarshal. +func (ctx *Decoder) Decode() (any, error) { + _, err := ctx.Parse() + if err != nil { + return nil, err + } + return ctx.Unmarshal() +} + +// Just a convenient call to ctx.Parse and ctx.UnmarshalStruct. +func (ctx *Decoder) DecodeStruct(dst any) error { + _, err := ctx.Parse() + if err != nil { + return err + } + return ctx.UnmarshalStruct(dst) +}