From 7d2b110b9ee547b88950dc6c5ca3aec7d383123fced7948bfbe74a3e4dc351a9 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Fri, 20 Jun 2025 23:21:08 +0300 Subject: [PATCH] FLOAT support --- c/cmd/deatomiser/deatomiser.c | 2 +- c/cmd/pp/pp.c | 2 +- c/cmd/test-vector/test-vector.c | 26 +++- c/lib/atom.h | 14 ++- c/lib/dec.c | 96 +++++++++----- c/lib/enc.c | 64 ++++++++++ c/lib/enc.h | 26 ++++ c/lib/err.c | 2 + c/lib/err.h | 1 + c/lib/fp.c | 119 ++++++++++++++++++ c/lib/fp.h | 13 ++ c/lib/items.c | 5 +- c/lib/o.list | 1 + go/README | 2 +- go/atom-decode.go | 34 ++--- go/atom-encode.go | 27 ++++ go/atomtype_string.go | 15 ++- go/cmd/pp/printer.go | 11 ++ go/cmd/test-vector-anys/main.go | 9 +- go/cmd/test-vector-manual/main.go | 12 +- go/ctx.go | 1 + go/encode.go | 4 + go/float.go | 62 +++++++++ go/float_test.go | 201 +++++++++++++++++++++--------- go/iter.go | 7 ++ go/parse.go | 39 ++++++ go/type.go | 37 +++--- go/unmarshal.go | 2 +- py3/README | 3 +- py3/keks.py | 70 +++++++++-- py3/test-vector.py | 11 +- py3/tests/test_float.py | 83 ++++++------ spec/encoding/FLOAT | 33 +++-- spec/encoding/FullTable | 10 +- spec/encoding/index | 5 +- tcl/README | 2 - tcl/keks.tcl | 75 ++++++++--- tcl/mk-fuzz-inputs | 6 + tcl/test-vector.tcl | 9 +- 39 files changed, 893 insertions(+), 248 deletions(-) create mode 100644 c/lib/fp.c create mode 100644 c/lib/fp.h create mode 100644 go/float.go diff --git a/c/cmd/deatomiser/deatomiser.c b/c/cmd/deatomiser/deatomiser.c index ba3c21d..6b18868 100644 --- a/c/cmd/deatomiser/deatomiser.c +++ b/c/cmd/deatomiser/deatomiser.c @@ -108,7 +108,7 @@ main(void) printf("BLOB(l=%zu\n", atom.v.blob.chunkLen); break; case KEKSItemFloat: - fputs("FLOAT: TODO\n", stdout); + printf("FLOAT(%f)\n", atom.v.fp); break; case KEKSItemTAI64: err = PrintTAI64(atom.v.str.ptr, atom.v.str.len); diff --git a/c/cmd/pp/pp.c b/c/cmd/pp/pp.c index 5cdd4bd..6891d34 100644 --- a/c/cmd/pp/pp.c +++ b/c/cmd/pp/pp.c @@ -213,7 +213,7 @@ printer( // NOLINT(misc-no-recursion) fputs("]\n", stdout); break; case KEKSItemFloat: - fputs("FLOAT: TODO\n", stdout); + fprintf(stdout, "FLOAT(%f)\n", item->atom.v.fp); break; case KEKSItemTAI64: { err = PrintTAI64(item->atom.v.str.ptr, item->atom.v.str.len); diff --git a/c/cmd/test-vector/test-vector.c b/c/cmd/test-vector/test-vector.c index ecb204c..ea8c5af 100644 --- a/c/cmd/test-vector/test-vector.c +++ b/c/cmd/test-vector/test-vector.c @@ -14,6 +14,7 @@ // License along with this program. If not, see . #include +#include #include #include #include @@ -228,6 +229,7 @@ main(void) { struct timespec ts; ts.tv_sec = 1234567890; + ts.tv_nsec = 0; assert(KEKSTimespecToTAI(&ts)); unsigned char tai[12] = {0}; assert(KEKSTimespecToTAI64(tai, &ts)); @@ -266,11 +268,25 @@ main(void) &Got, buf + Off, len - Off, (const unsigned char *)"floats", 6)); adder(KEKSAtomListEncode(&Got, buf + Off, len - Off)); // .floats { - buf[Off] = KEKSAtomFloat32; - Off++; - size_t l = 4; - memcpy(buf + Off, (const unsigned char *)"\x01\x02\x03\x04", l); - Off += l; + adder(KEKSAtomFloatEncode(&Got, buf + Off, len - Off, (double)NAN)); + adder(KEKSAtomFloatEncode(&Got, buf + Off, len - Off, (double)INFINITY)); + adder(KEKSAtomFloatEncode(&Got, buf + Off, len - Off, -(double)INFINITY)); + adder(KEKSAtomFloatEncode(&Got, buf + Off, len - Off, 0.0)); + adder(KEKSAtomFloatEncode(&Got, buf + Off, len - Off, -45.25)); + adder(KEKSAtomFloatEncode(&Got, buf + Off, len - Off, 0.15625)); + adder(KEKSAtomFloatMEEncode( + &Got, buf + Off, len - Off, -8687443681197687, -46)); // -123.456 + { + buf[Off] = KEKSAtomFloat; + Off++; + size_t l = 18; + memcpy( + buf + Off, + (const unsigned char + *)"\x0C\x8C\x27\xE4\x1B\x32\x46\xBE\xC9\xB1\x6E\x39\x81\x15\x0D\x82\x30\x38", + l); + Off += l; + } } adder(KEKSAtomEOCEncode(&Got, buf + Off, len - Off)); // .floats diff --git a/c/lib/atom.h b/c/lib/atom.h index 74b7c56..173916f 100644 --- a/c/lib/atom.h +++ b/c/lib/atom.h @@ -16,11 +16,10 @@ enum KEKSAtomType { KEKSAtomBlob = 0x0B, KEKSAtomPint = 0x0C, KEKSAtomNint = 0x0D, - KEKSAtomFloat16 = 0x10, - KEKSAtomFloat32 = 0x11, - KEKSAtomFloat64 = 0x12, - KEKSAtomFloat128 = 0x13, - KEKSAtomFloat256 = 0x14, + KEKSAtomFloatNaN = 0x10, + KEKSAtomFloatPinf = 0x11, + KEKSAtomFloatNinf = 0x12, + KEKSAtomFloat = 0x13, KEKSAtomTAI64 = 0x18, KEKSAtomTAI64N = 0x19, KEKSAtomTAI64NA = 0x1A, @@ -83,6 +82,8 @@ enum KEKSItemType { // Value of the positive integer. // @item .v.nint // Value of the negative integer. +// @item .v.fp +// Value of the floating point. // @item .v.list // That value is filled only when dealing with @ref{Items, items} // for lists and maps. @@ -95,7 +96,7 @@ enum KEKSItemType { // @code{.chunkLen} is the length of the chunk. @code{.chunks} is // the number of chunks, including the terminating binary string. // @item .v.str -// @code{.ptr} points to the start of the binary/UTF-8/TAI64*/Magic +// @code{.ptr} points to the start of the binary/UTF-8/TAI64*/Magic/hexlet // string. @code{.len} is its length in bytes. // Raw values use it as a payload. // @end table @@ -104,6 +105,7 @@ struct KEKSAtom { union { uint64_t pint; int64_t nint; + double fp; struct { size_t head; size_t len; diff --git a/c/lib/dec.c b/c/lib/dec.c index 8df815f..8ee2794 100644 --- a/c/lib/dec.c +++ b/c/lib/dec.c @@ -14,6 +14,7 @@ // License along with this program. If not, see . #include +#include #include #include #include @@ -22,9 +23,25 @@ #include "atom.h" #include "dec.h" #include "err.h" +#include "fp.h" #include "frombe.h" #include "utf8.h" +static bool +atomIsInt(struct KEKSAtom *atom) +{ + if ((atom->typ == KEKSItemPint) || (atom->typ == KEKSItemNint)) { + return true; + } + if (atom->typ != KEKSItemRaw) { + return false; + } + if (atom->v.str.len == 0) { + return false; + } + return (atom->v.str.ptr[0] == KEKSAtomPint) || (atom->v.str.ptr[0] == KEKSAtomNint); +} + enum KEKSErr KEKSAtomDecode( // NOLINT(misc-no-recursion) size_t *got, @@ -209,38 +226,59 @@ KEKSAtomDecode( // NOLINT(misc-no-recursion) return KEKSErrNo; } - case KEKSAtomFloat16: - case KEKSAtomFloat32: - case KEKSAtomFloat64: - case KEKSAtomFloat128: - case KEKSAtomFloat256: { - size_t l = 0; - switch (tag) { - case KEKSAtomFloat16: - l = 2; - break; - case KEKSAtomFloat32: - l = 4; - break; - case KEKSAtomFloat64: - l = 8; - break; - case KEKSAtomFloat128: - l = 16; - break; - case KEKSAtomFloat256: - l = 32; - break; - default: - assert(false); - } + case KEKSAtomFloatNaN: atom->typ = KEKSItemFloat; - (*got) += l; - if (len < (*got)) { - return KEKSErrNotEnough; + atom->v.fp = (double)NAN; + break; + case KEKSAtomFloatPinf: + atom->typ = KEKSItemFloat; + atom->v.fp = (double)INFINITY; + break; + case KEKSAtomFloatNinf: + atom->typ = KEKSItemFloat; + atom->v.fp = -((double)INFINITY); + break; + case KEKSAtomFloat: { + atom->typ = KEKSItemFloat; + size_t mGot = 0; + struct KEKSAtom m; + memset(&m, 0, sizeof(struct KEKSAtom)); + enum KEKSErr err = KEKSAtomDecode(&mGot, &m, buf + 1, len - 1); + if (err != KEKSErrNo) { + return err; + } + (*got) += mGot; + if (!atomIsInt(&m)) { + return KEKSErrFloatNonInt; + } + + size_t eGot = 0; + struct KEKSAtom e; + memset(&e, 0, sizeof(struct KEKSAtom)); + err = KEKSAtomDecode(&eGot, &e, buf + 1 + mGot, len - 1 - mGot); + if (err != KEKSErrNo) { + return err; + } + (*got) += eGot; + if (!atomIsInt(&e)) { + return KEKSErrFloatNonInt; + } + + if (((m.typ == KEKSItemPint) && ((m.v.pint >> 63) > 0)) || + ((e.typ == KEKSItemPint) && ((e.v.pint >> 63) > 0))) { + goto FloatRaw; } + + if (keksMEToDouble( + &(atom->v.fp), + (m.typ == KEKSItemPint) ? (int64_t)m.v.pint : m.v.nint, + (e.typ == KEKSItemPint) ? (int64_t)e.v.pint : e.v.nint)) { + break; + } + + FloatRaw: atom->typ = KEKSItemRaw; - atom->v.str.len = l + 1; + atom->v.str.len = 1 + mGot + eGot; atom->v.str.ptr = buf; break; } diff --git a/c/lib/enc.c b/c/lib/enc.c index 5d2cabc..683a9e3 100644 --- a/c/lib/enc.c +++ b/c/lib/enc.c @@ -14,6 +14,7 @@ // License along with this program. If not, see . #include +#include #include #include #include @@ -21,6 +22,7 @@ #include "atom.h" #include "enc.h" +#include "fp.h" #include "tobe.h" bool @@ -158,6 +160,68 @@ KEKSAtomSintEncode(size_t *len, unsigned char *buf, const size_t cap, const int6 return ok; } +bool +KEKSAtomFloatMEEncode( + size_t *len, + unsigned char *buf, + const size_t cap, + const int64_t m, + const int e) +{ + if (cap < 1) { + return false; + } + assert(len != NULL); + assert(buf != NULL); + (*len) = 1; + buf[0] = KEKSAtomFloat; + size_t mLen = 0; + if (!KEKSAtomSintEncode(&mLen, buf + 1, cap - 1, m)) { + return false; + } + (*len) += mLen; + size_t eLen = 0; + if (!KEKSAtomSintEncode(&eLen, buf + 1 + mLen, cap - 1 - mLen, (int64_t)e)) { + return false; + } + (*len) += eLen; + return true; +} + +bool +KEKSAtomFloatEncode(size_t *len, unsigned char *buf, const size_t cap, const double v) +{ + if (cap < 1) { + return false; + } + assert(len != NULL); + assert(buf != NULL); + int64_t m = 0; + int e = 0; + switch (fpclassify(v)) { + case FP_INFINITE: + (*len) = 1; + buf[0] = (v > 0) ? KEKSAtomFloatPinf : KEKSAtomFloatNinf; + return true; + case FP_NAN: + (*len) = 1; + buf[0] = KEKSAtomFloatNaN; + return true; + case FP_NORMAL: + if (!keksDoubleToME(&m, &e, v)) { + return false; + } + break; + case FP_SUBNORMAL: + return false; + case FP_ZERO: + break; + default: + assert(false); + } + return KEKSAtomFloatMEEncode(len, buf, cap, m, e); +} + bool KEKSAtomListEncode(size_t *len, unsigned char *buf, const size_t cap) { diff --git a/c/lib/enc.h b/c/lib/enc.h index 908eb25..1360114 100644 --- a/c/lib/enc.h +++ b/c/lib/enc.h @@ -88,6 +88,32 @@ KEKSAtomUintEncode(size_t *len, unsigned char *buf, const size_t cap, const uint bool KEKSAtomSintEncode(size_t *len, unsigned char *buf, const size_t cap, const int64_t v); +// TEXINFO: KEKSAtomFloatEncode +// @deftypefun bool KEKSAtomFloatEncode @ +// (size_t *len, unsigned char *buf, const size_t cap, const double v) +// Encode double float in provided @var{buf} with capacity of @var{cap}. +// In case of success, true is returned and @var{len} will hold how many +// bytes were written to buffer. +// @end deftypefun +bool +KEKSAtomFloatEncode(size_t *len, unsigned char *buf, const size_t cap, const double); + +// TEXINFO: KEKSAtomFloatMEEncode +// @deftypefun bool KEKSAtomFloatMEEncode @ +// (size_t *len, unsigned char *buf, const size_t cap, @ +// const int64_t m, const int e) +// Encode float's mantissa and exponent in provided @var{buf} with +// capacity of @var{cap}. In case of success, true is returned and +// @var{len} will hold how many bytes were written to buffer. +// @end deftypefun +bool +KEKSAtomFloatMEEncode( + size_t *len, + unsigned char *buf, + const size_t cap, + const int64_t m, + const int e); + // TEXINFO: KEKSAtomListEncode // @deftypefun bool KEKSAtomListEncode @ // (size_t *len, unsigned char *buf, const size_t cap) diff --git a/c/lib/err.c b/c/lib/err.c index c4b1b75..09bad28 100644 --- a/c/lib/err.c +++ b/c/lib/err.c @@ -50,6 +50,8 @@ KEKSErr2Str(const enum KEKSErr err) return "UnexpectedEOC"; case KEKSErrBadMagic: return "BadMagic"; + case KEKSErrFloatNonInt: + return "FloatNonInt"; default: return "unknown"; } diff --git a/c/lib/err.h b/c/lib/err.h index f3b60f6..946f3fe 100644 --- a/c/lib/err.h +++ b/c/lib/err.h @@ -69,6 +69,7 @@ enum KEKSErr { KEKSErrDeepRecursion, KEKSErrUnexpectedEOC, KEKSErrBadMagic, + KEKSErrFloatNonInt, }; // TEXINFO: KEKSErr2Str diff --git a/c/lib/fp.c b/c/lib/fp.c new file mode 100644 index 0000000..6dfd8ac --- /dev/null +++ b/c/lib/fp.c @@ -0,0 +1,119 @@ +// That code is fully based on sqlite-src-3450100/ext/misc/ieee754.c. +// Excerpt from its source code: +// +// 2013-04-17 +// +// The author disclaims copyright to this source code. In place of +// a legal notice, here is a blessing: +// +// May you do good and not evil. +// May you find forgiveness for yourself and forgive others. +// May you share freely, never taking more than you give. + +#include +#include +#include +#include +#include + +#include "fp.h" + +bool +keksMEToDouble(double *d, int64_t m, int64_t e) +{ + if ((m == 0) && (e == 0)) { + (*d) = 0.0; + return true; + } + if ((e > 1023) || (e < -1022)) { + return false; + } + bool neg = false; + uint64_t um = 0; + if (m < 0) { + neg = true; + um = (uint64_t)(-m); + } else { + um = (uint64_t)m; + } + // if (um == 0 && e > -1000 && e < 1000) { + // (*d) = 0.0; + // return true; + // } + if (um >= 9007199254740992) { + return false; + } + while ((um >> 32) & 0xFFE00000) { + um >>= 1; + e++; + } + while (um != 0 && ((um >> 32) & 0xFFF00000) == 0) { + um <<= 1; + e--; + } + e += 1075; + if (e <= 0) { + if (1 - e >= 64) { // subnormal + um = 0; + } else { + um >>= (uint64_t)(1 - e); + } + e = 0; + } else if (e > 0x7FF) { + e = 0x07FF; + } + uint64_t a = um & ((((uint64_t)1) << 52) - 1); + a |= ((uint64_t)e) << 52; + if (neg) { + a |= ((uint64_t)1) << 63; + } + memcpy(d, &a, sizeof(double)); + return true; +} + +bool +keksDoubleToME(int64_t *m, int *e, double d) +{ + switch (fpclassify(d)) { + case FP_NORMAL: + break; + case FP_ZERO: + (*m) = 0; + (*e) = 0; + return true; + case FP_INFINITE: + case FP_NAN: + case FP_SUBNORMAL: + return false; + default: + assert(false); + } + bool neg = false; + if (d < 0.0) { + neg = true; + d = -d; + } + uint64_t a = 0; + memcpy(&a, &d, sizeof(a)); + uint64_t _m = 0; + int _e = 0; + if (a == 0) { + _e = 0; + _m = 0; + } else { + _e = a >> 52; + _m = a & ((((uint64_t)1) << 52) - 1); + if (_e == 0) { + _m <<= 1; + } else { + _m |= ((uint64_t)1) << 52; + } + while (_e < 1075 && _m > 0 && (_m & 1) == 0) { + _m >>= 1; + _e++; + } + } + (*m) = neg ? -((int64_t)_m) : (int64_t)_m; + (*e) = _e - 1075; + return true; +} diff --git a/c/lib/fp.h b/c/lib/fp.h new file mode 100644 index 0000000..b5e3c75 --- /dev/null +++ b/c/lib/fp.h @@ -0,0 +1,13 @@ +#ifndef KEKS_FP_H +#define KEKS_FP_H + +#include +#include + +bool +keksMEToDouble(double *, int64_t m, int64_t e); + +bool +keksDoubleToME(int64_t *m, int *e, double); + +#endif // KEKS_FP_H diff --git a/c/lib/items.c b/c/lib/items.c index 64ca405..37cebae 100644 --- a/c/lib/items.c +++ b/c/lib/items.c @@ -355,6 +355,9 @@ KEKSItemsEncode( // NOLINT(misc-no-recursion) case KEKSItemNint: ok = KEKSAtomSintEncode(&got, buf + *off, cap - (*off), item->atom.v.nint); break; + case KEKSItemFloat: + ok = KEKSAtomFloatEncode(&got, buf + *off, cap - (*off), item->atom.v.fp); + break; case KEKSItemList: ok = KEKSAtomListEncode(&got, buf + *off, cap - (*off)); if (!ok) { @@ -431,8 +434,6 @@ KEKSItemsEncode( // NOLINT(misc-no-recursion) &got, buf + *off, cap - (*off), item->atom.v.str.ptr, item->atom.v.str.len); break; } - case KEKSItemFloat: - return false; case KEKSItemTAI64: ok = KEKSAtomTAI64Encode( &got, buf + *off, cap - (*off), item->atom.v.str.ptr, item->atom.v.str.len); diff --git a/c/lib/o.list b/c/lib/o.list index 2d87f0c..fd9b2f7 100644 --- a/c/lib/o.list +++ b/c/lib/o.list @@ -3,6 +3,7 @@ dectai.o enc.o enctai.o err.o +fp.o frombe.o items.o leapsecs.o diff --git a/go/README b/go/README index 4ea83ff..7d3ba5d 100644 --- a/go/README +++ b/go/README @@ -1,5 +1,5 @@ Go implementation of the KEKS codec, KEKS/Schema validator and KEKS/CM. -No FLOAT* support. They are stored/decoded just as a raw value. +Partial FLOAT support. It is free software: see the file COPYING.LESSER for copying conditions. diff --git a/go/atom-decode.go b/go/atom-decode.go index e5eff1b..0138409 100644 --- a/go/atom-decode.go +++ b/go/atom-decode.go @@ -166,28 +166,18 @@ func (ctx *Decoder) DecodeAtom() (t types.Type, err error) { ctx.ints = append(ctx.ints, -1-int64(i)) } } - case AtomFloat16, AtomFloat32, AtomFloat64, AtomFloat128, AtomFloat256: - var l int - switch AtomType(tag) { - case AtomFloat16: - l = 2 - case AtomFloat32: - l = 4 - case AtomFloat64: - l = 8 - case AtomFloat128: - l = 16 - case AtomFloat256: - l = 32 - } - var s string - s, err = ctx.getBytes(l) - if err != nil { - return - } - t = types.Raw - ctx.rawTypes = append(ctx.rawTypes, AtomType(tag)) - ctx.strs = append(ctx.strs, s) + case AtomFloatNaN: + t = types.Float + ctx.floats = append(ctx.floats, &Float{NaN: true}) + case AtomFloatPinf: + t = types.Float + ctx.floats = append(ctx.floats, &Float{Pinf: true}) + case AtomFloatNinf: + t = types.Float + ctx.floats = append(ctx.floats, &Float{Ninf: true}) + case AtomFloat: + t = types.Float + ctx.floats = append(ctx.floats, &Float{}) case AtomTAI64, AtomTAI64N, AtomTAI64NA: var l int switch AtomType(tag) { diff --git a/go/atom-encode.go b/go/atom-encode.go index 7468f5f..7987c4f 100644 --- a/go/atom-encode.go +++ b/go/atom-encode.go @@ -126,6 +126,33 @@ func BigIntEncode(w io.Writer, v *big.Int) (written int64, err error) { return } +// Write an encoded FLOAT atom. +func FloatEncode(w io.Writer, v *Float) (written int64, err error) { + if v.NaN { + return ByteEncode(w, byte(AtomFloatNaN)) + } + if v.Pinf { + return ByteEncode(w, byte(AtomFloatPinf)) + } + if v.Ninf { + return ByteEncode(w, byte(AtomFloatNinf)) + } + _, err = w.Write([]byte{byte(AtomFloat)}) + if err != nil { + return + } + written++ + var wr int64 + wr, err = BigIntEncode(w, v.M) + written += wr + if err != nil { + return + } + wr, err = BigIntEncode(w, v.E) + written += wr + return +} + // Write an encoded BLOB atom. func BlobAtomEncode(w io.Writer, chunkLen int64) (written int64, err error) { l := make([]byte, 9) diff --git a/go/atomtype_string.go b/go/atomtype_string.go index 303c569..5eba9cf 100644 --- a/go/atomtype_string.go +++ b/go/atomtype_string.go @@ -18,11 +18,10 @@ func _() { _ = x[AtomBLOB-11] _ = x[AtomPInt-12] _ = x[AtomNInt-13] - _ = x[AtomFloat16-16] - _ = x[AtomFloat32-17] - _ = x[AtomFloat64-18] - _ = x[AtomFloat128-19] - _ = x[AtomFloat256-20] + _ = x[AtomFloatNaN-16] + _ = x[AtomFloatPinf-17] + _ = x[AtomFloatNinf-18] + _ = x[AtomFloat-19] _ = x[AtomTAI64-24] _ = x[AtomTAI64N-25] _ = x[AtomTAI64NA-26] @@ -33,7 +32,7 @@ const ( _AtomType_name_0 = "AtomEOCAtomNILAtomFalseAtomTrueAtomHexlet" _AtomType_name_1 = "AtomListAtomMap" _AtomType_name_2 = "AtomBLOBAtomPIntAtomNInt" - _AtomType_name_3 = "AtomFloat16AtomFloat32AtomFloat64AtomFloat128AtomFloat256" + _AtomType_name_3 = "AtomFloatNaNAtomFloatPinfAtomFloatNinfAtomFloat" _AtomType_name_4 = "AtomTAI64AtomTAI64NAtomTAI64NA" _AtomType_name_5 = "AtomMagic" ) @@ -42,7 +41,7 @@ var ( _AtomType_index_0 = [...]uint8{0, 7, 14, 23, 31, 41} _AtomType_index_1 = [...]uint8{0, 8, 15} _AtomType_index_2 = [...]uint8{0, 8, 16, 24} - _AtomType_index_3 = [...]uint8{0, 11, 22, 33, 45, 57} + _AtomType_index_3 = [...]uint8{0, 12, 25, 38, 47} _AtomType_index_4 = [...]uint8{0, 9, 19, 30} ) @@ -56,7 +55,7 @@ func (i AtomType) String() string { case 11 <= i && i <= 13: i -= 11 return _AtomType_name_2[_AtomType_index_2[i]:_AtomType_index_2[i+1]] - case 16 <= i && i <= 20: + case 16 <= i && i <= 19: i -= 16 return _AtomType_name_3[_AtomType_index_3[i]:_AtomType_index_3[i+1]] case 24 <= i && i <= 26: diff --git a/go/cmd/pp/printer.go b/go/cmd/pp/printer.go index 6d91d5c..56de1f9 100644 --- a/go/cmd/pp/printer.go +++ b/go/cmd/pp/printer.go @@ -140,6 +140,17 @@ func printer(iter *keks.Iterator, where []string, count int, inList, inMap bool) fmt.Println(iter.Int()) case types.BigInt: fmt.Println(iter.BigInt()) + case types.Float: + f := iter.Float() + if f.NaN { + fmt.Println("NaN") + } else if f.Pinf { + fmt.Println("+inf") + } else if f.Ninf { + fmt.Println("-inf") + } else { + fmt.Printf("%s (m=%d e=%d)\n", f.BigFloat().String(), f.M, f.E) + } case types.Blob: blob := iter.Blob() if *onlyV { diff --git a/go/cmd/test-vector-anys/main.go b/go/cmd/test-vector-anys/main.go index 80ca7b3..21e0d62 100644 --- a/go/cmd/test-vector-anys/main.go +++ b/go/cmd/test-vector-anys/main.go @@ -110,7 +110,14 @@ func main() { tai64n.TAI64NA(mustHexDec("40000000499602F40006F855075BCD15")), }, "floats": []any{ - keks.Raw(append([]byte{byte(keks.AtomFloat32)}, mustHexDec("01020304")...)), + keks.Float{NaN: true}, + keks.Float{Pinf: true}, + keks.Float{Ninf: true}, + keks.Float{M: big.NewInt(0), E: big.NewInt(0)}, + keks.Float{M: big.NewInt(-181), E: big.NewInt(-2)}, + keks.Float{M: big.NewInt(5), E: big.NewInt(-5)}, + keks.Float{M: big.NewInt(-8687443681197687), E: big.NewInt(-46)}, + keks.Float{M: big.NewInt(0).SetBytes([]byte{0x27, 0xE4, 0x1B, 0x32, 0x46, 0xBE, 0xC9, 0xB1, 0x6E, 0x39, 0x81, 0x15}), E: big.NewInt(-12345)}, }, "uuid": uuid.MustParse("0e875e3f-d385-49eb-87b4-be42d641c367"), "ip": net.ParseIP("2001:db8:85a3:8d3:1319:8a2e:370:7348"), diff --git a/go/cmd/test-vector-manual/main.go b/go/cmd/test-vector-manual/main.go index 6e71f39..d9b81fe 100644 --- a/go/cmd/test-vector-manual/main.go +++ b/go/cmd/test-vector-manual/main.go @@ -184,10 +184,14 @@ func main() { { mustEncode(keks.StrEncode(&buf, "floats")) mustEncode(keks.ByteEncode(&buf, byte(keks.AtomList))) - mustEncode(io.Copy(&buf, bytes.NewReader(append( - []byte{byte(keks.AtomFloat32)}, - []byte("\x01\x02\x03\x04")..., - )))) + mustEncode(keks.FloatEncode(&buf, &keks.Float{NaN: true})) + mustEncode(keks.FloatEncode(&buf, &keks.Float{Pinf: true})) + mustEncode(keks.FloatEncode(&buf, &keks.Float{Ninf: true})) + mustEncode(keks.FloatEncode(&buf, &keks.Float{M: big.NewInt(0), E: big.NewInt(0)})) + mustEncode(keks.FloatEncode(&buf, &keks.Float{M: big.NewInt(-181), E: big.NewInt(-2)})) + mustEncode(keks.FloatEncode(&buf, &keks.Float{M: big.NewInt(5), E: big.NewInt(-5)})) + mustEncode(keks.FloatEncode(&buf, &keks.Float{M: big.NewInt(-8687443681197687), E: big.NewInt(-46)})) + mustEncode(keks.FloatEncode(&buf, &keks.Float{M: big.NewInt(0).SetBytes([]byte{0x27, 0xE4, 0x1B, 0x32, 0x46, 0xBE, 0xC9, 0xB1, 0x6E, 0x39, 0x81, 0x15}), E: big.NewInt(-12345)})) mustEncode(keks.ByteEncode(&buf, byte(keks.AtomEOC))) } { diff --git a/go/ctx.go b/go/ctx.go index 7bc1811..a452bbc 100644 --- a/go/ctx.go +++ b/go/ctx.go @@ -66,6 +66,7 @@ type Decoder struct { tai64s []tai64n.TAI64 uints []uint64 hexlets []*Hexlet + floats []*Float blobChunkLens []int64 blobChunkses [][]string diff --git a/go/encode.go b/go/encode.go index eb8edf8..167ed28 100644 --- a/go/encode.go +++ b/go/encode.go @@ -69,6 +69,10 @@ func Encode(w io.Writer, v any, opts *EncodeOpts) (written int64, err error) { return io.Copy(w, bytes.NewReader(v)) case *big.Int: return BigIntEncode(w, v) + case Float: + return FloatEncode(w, &v) + case *Float: + return FloatEncode(w, v) case bool: return BoolEncode(w, v) case uuid.UUID: diff --git a/go/float.go b/go/float.go new file mode 100644 index 0000000..be6b963 --- /dev/null +++ b/go/float.go @@ -0,0 +1,62 @@ +// KEKS -- Go KEKS codec implementation +// Copyright (C) 2024-2025 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program. If not, see . + +package keks + +import ( + "fmt" + "math/big" +) + +type Float struct { + M *big.Int + E *big.Int + NaN bool + Pinf bool + Ninf bool +} + +func (f *Float) MarshalJSON() ([]byte, error) { + if f.NaN { + return fmt.Appendf(nil, "FLOAT[NaN]"), nil + } + if f.Pinf { + return fmt.Appendf(nil, "FLOAT[+inf]"), nil + } + if f.Ninf { + return fmt.Appendf(nil, "FLOAT[-inf]"), nil + } + return fmt.Appendf(nil, "FLOAT[%d, %d]", f.M, f.E), nil +} + +func (f *Float) BigFloat() *big.Float { + exp := big.NewFloat(0).SetInt(big.NewInt(0).Exp( + big.NewInt(2), + big.NewInt(0).Abs(f.E), + nil, + )) + if f.E.Sign() == -1 { + exp = exp.Quo(big.NewFloat(1.0), exp) + } + return exp.Mul(exp, big.NewFloat(0).SetInt(f.M)) +} + +func (f *Float) IsZero() bool { + if f.NaN || f.Pinf || f.Ninf { + return false + } + z := big.NewInt(0) + return (f.M.Cmp(z) == 0) && (f.E.Cmp(z) == 0) +} diff --git a/go/float_test.go b/go/float_test.go index 1846ad2..86d82a9 100644 --- a/go/float_test.go +++ b/go/float_test.go @@ -1,6 +1,5 @@ // KEKS -- Go KEKS codec implementation -// Copyright (C) 2024-2025 Anton Rudenko -// Sergey Matveev +// Copyright (C) 2024-2025 Sergey Matveev // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as @@ -19,7 +18,9 @@ package keks import ( "bytes" "io" + "math/big" "testing" + "testing/quick" ) func TestEncodingFloat(t *testing.T) { @@ -29,137 +30,223 @@ func TestEncodingFloat(t *testing.T) { } } -func TestFloat16Loads(t *testing.T) { - bin := append([]byte{0x10}, bytes.Repeat([]byte{0x11}, 2)...) +func TestFloatNaN(t *testing.T) { + bin := []byte{0x10} decoder := NewDecoderFromBytes(append(bin, Junk...), nil) decoded, err := decoder.Decode() if err != nil { t.Fatal(err) } - casted, ok := decoded.(Raw) + casted, ok := decoded.(*Float) if !ok { t.Fatal("failed to cast") } - if !bytes.Equal(casted, bin) { - t.Fatal("casted differs") + if !casted.NaN { + t.Fatal("got non NaN") + } + if casted.Pinf || casted.Ninf || casted.M != nil || casted.E != nil { + t.Fatal("unexpectedly filled fields") } if !bytes.Equal(decoder.B, Junk) { t.Fatal("tail differs") } } -func TestFloat32Loads(t *testing.T) { - bin := append([]byte{0x11}, bytes.Repeat([]byte{0x11}, 4)...) +func TestFloatPinf(t *testing.T) { + bin := []byte{0x11} decoder := NewDecoderFromBytes(append(bin, Junk...), nil) decoded, err := decoder.Decode() if err != nil { t.Fatal(err) } - casted, ok := decoded.(Raw) + casted, ok := decoded.(*Float) if !ok { t.Fatal("failed to cast") } - if !bytes.Equal(casted, bin) { - t.Fatal("casted differs") + if !casted.Pinf { + t.Fatal("got non +inf") + } + if casted.NaN || casted.Ninf || casted.M != nil || casted.E != nil { + t.Fatal("unexpectedly filled fields") } if !bytes.Equal(decoder.B, Junk) { t.Fatal("tail differs") } } -func TestFloat64Loads(t *testing.T) { - bin := append([]byte{0x12}, bytes.Repeat([]byte{0x12}, 8)...) +func TestFloatNinf(t *testing.T) { + bin := []byte{0x12} decoder := NewDecoderFromBytes(append(bin, Junk...), nil) decoded, err := decoder.Decode() if err != nil { t.Fatal(err) } - casted, ok := decoded.(Raw) + casted, ok := decoded.(*Float) if !ok { t.Fatal("failed to cast") } - if !bytes.Equal(casted, bin) { - t.Fatal("casted differs") + if !casted.Ninf { + t.Fatal("got non -inf") + } + if casted.NaN || casted.Pinf || casted.M != nil || casted.E != nil { + t.Fatal("unexpectedly filled fields") } if !bytes.Equal(decoder.B, Junk) { t.Fatal("tail differs") } } -func TestFloat128Loads(t *testing.T) { - bin := append([]byte{0x13}, bytes.Repeat([]byte{0x13}, 16)...) - decoder := NewDecoderFromBytes(append(bin, Junk...), nil) +func TestFloat0(t *testing.T) { + var b bytes.Buffer + b.Write([]byte{0x13}) + UIntEncode(&b, 0) + UIntEncode(&b, 0) + b.Write(Junk) + decoder := NewDecoderFromBytes(b.Bytes(), nil) decoded, err := decoder.Decode() if err != nil { t.Fatal(err) } - casted, ok := decoded.(Raw) + casted, ok := decoded.(*Float) if !ok { t.Fatal("failed to cast") } - if !bytes.Equal(casted, bin) { - t.Fatal("casted differs") + if casted.NaN || casted.Pinf || casted.Ninf { + t.Fatal("unexpectedly filled fields") } if !bytes.Equal(decoder.B, Junk) { t.Fatal("tail differs") } -} - -func TestFloat256Loads(t *testing.T) { - bin := append([]byte{0x14}, bytes.Repeat([]byte{0x14}, 32)...) - decoder := NewDecoderFromBytes(append(bin, Junk...), nil) - decoded, err := decoder.Decode() - if err != nil { - t.Fatal(err) - } - casted, ok := decoded.(Raw) - if !ok { - t.Fatal("failed to cast") + if casted.M.Cmp(big.NewInt(0)) != 0 { + t.Fatal("m differs") } - if !bytes.Equal(casted, bin) { - t.Fatal("casted differs") + if casted.E.Cmp(big.NewInt(0)) != 0 { + t.Fatal("m differs") } - if !bytes.Equal(decoder.B, Junk) { - t.Fatal("tail differs") + f, _ := casted.BigFloat().Float64() + if f != 0.0 { + t.Fatal("non zero") } } -func TestFloat16NotEnoughData(t *testing.T) { - bin := append([]byte{0x10}, bytes.Repeat([]byte{0x11}, 2-1)...) - _, err := NewDecoderFromBytes(bin, nil).Decode() +func TestFloatNotEnough(t *testing.T) { + decoder := NewDecoderFromBytes([]byte{0x13}, nil) + _, err := decoder.Decode() if err != io.ErrUnexpectedEOF { t.Fatal(err) } } -func TestFloat32NotEnoughData(t *testing.T) { - bin := append([]byte{0x11}, bytes.Repeat([]byte{0x11}, 4-1)...) - _, err := NewDecoderFromBytes(bin, nil).Decode() +func TestFloatSingleInt(t *testing.T) { + var b bytes.Buffer + b.Write([]byte{0x13}) + UIntEncode(&b, 0) + decoder := NewDecoderFromBytes(b.Bytes(), nil) + _, err := decoder.Decode() if err != io.ErrUnexpectedEOF { t.Fatal(err) } } -func TestFloat64NotEnoughData(t *testing.T) { - bin := append([]byte{0x12}, bytes.Repeat([]byte{0x11}, 8-1)...) - _, err := NewDecoderFromBytes(bin, nil).Decode() - if err != io.ErrUnexpectedEOF { +func TestFloatNonInt0(t *testing.T) { + var b bytes.Buffer + b.Write([]byte{0x13}) + BoolEncode(&b, true) + decoder := NewDecoderFromBytes(b.Bytes(), nil) + _, err := decoder.Decode() + if err != ErrFloatBadInt { t.Fatal(err) } } -func TestFloat128NotEnoughData(t *testing.T) { - bin := append([]byte{0x13}, bytes.Repeat([]byte{0x11}, 16-1)...) - _, err := NewDecoderFromBytes(bin, nil).Decode() - if err != io.ErrUnexpectedEOF { +func TestFloatNonInt1(t *testing.T) { + var b bytes.Buffer + b.Write([]byte{0x13}) + UIntEncode(&b, 0) + BoolEncode(&b, true) + decoder := NewDecoderFromBytes(b.Bytes(), nil) + _, err := decoder.Decode() + if err != ErrFloatBadInt { t.Fatal(err) } } -func TestFloat256NotEnoughData(t *testing.T) { - bin := append([]byte{0x14}, bytes.Repeat([]byte{0x11}, 32-1)...) - _, err := NewDecoderFromBytes(bin, nil).Decode() - if err != io.ErrUnexpectedEOF { +func TestFloatSymmetric(t *testing.T) { + f := func(m, e int64) bool { + fp := Float{M: big.NewInt(-181), E: big.NewInt(-2)} + var b bytes.Buffer + if _, err := FloatEncode(&b, &fp); err != nil { + t.Fatal(err) + } + b.Write(Junk) + decoder := NewDecoderFromBytes(b.Bytes(), nil) + decoded, err := decoder.Decode() + if err != nil { + t.Fatal(err) + } + casted, ok := decoded.(*Float) + if !ok { + t.Fatal("failed to cast") + } + if casted.NaN || casted.Pinf || casted.Ninf { + t.Fatal("unexpectedly filled fields") + } + if !bytes.Equal(decoder.B, Junk) { + t.Fatal("tail differs") + } + return (casted.M.Cmp(fp.M) == 0) && (casted.E.Cmp(fp.E) == 0) + } + if err := quick.Check(f, nil); err != nil { + t.Fatal(err) + } +} + +func TestFloatExamples(t *testing.T) { + var b bytes.Buffer + if _, err := FloatEncode(&b, &Float{ + M: big.NewInt(-181), + E: big.NewInt(-2), + }); err != nil { + t.Fatal(err) + } + decoded, err := NewDecoderFromBytes(b.Bytes(), nil).Decode() + if err != nil { + t.Fatal(err) + } + fp, _ := decoded.(*Float).BigFloat().Float64() + if fp != -45.25 { + t.Fatal("-45.25") + } + + b.Reset() + if _, err = FloatEncode(&b, &Float{ + M: big.NewInt(5), + E: big.NewInt(-5), + }); err != nil { + t.Fatal(err) + } + decoded, err = NewDecoderFromBytes(b.Bytes(), nil).Decode() + if err != nil { + t.Fatal(err) + } + fp, _ = decoded.(*Float).BigFloat().Float64() + if fp != 0.15625 { + t.Fatal("0.15625") + } + + b.Reset() + if _, err = FloatEncode(&b, &Float{ + M: big.NewInt(-8687443681197687), + E: big.NewInt(-46), + }); err != nil { t.Fatal(err) } + decoded, err = NewDecoderFromBytes(b.Bytes(), nil).Decode() + if err != nil { + t.Fatal(err) + } + fp, _ = decoded.(*Float).BigFloat().Float64() + if fp != -123.456 { + t.Fatal("-123.456") + } } diff --git a/go/iter.go b/go/iter.go index 59a9f25..700f0ef 100644 --- a/go/iter.go +++ b/go/iter.go @@ -47,6 +47,7 @@ type Iterator struct { tai64s int uints int hexlets int + floats int } func (ctx *Decoder) Iter() *Iterator { @@ -76,6 +77,8 @@ func (iter *Iterator) Next() bool { iter.tai64ns++ case types.TAI64NA: iter.tai64nas++ + case types.Float: + iter.floats++ case types.Bin, types.Str, types.Magic: iter.strs++ case types.Raw: @@ -118,6 +121,10 @@ func (iter *Iterator) BigInt() *big.Int { return iter.ctx.bigints[iter.bigints] } +func (iter *Iterator) Float() *Float { + return iter.ctx.floats[iter.floats] +} + func (iter *Iterator) Blob() BlobChunked { return BlobChunked{ ChunkLen: iter.ctx.blobChunkLens[iter.blobs], diff --git a/go/parse.go b/go/parse.go index 9d433ac..7f966e8 100644 --- a/go/parse.go +++ b/go/parse.go @@ -17,6 +17,7 @@ package keks import ( "errors" + "math/big" "go.cypherpunks.su/keks/types" ) @@ -28,6 +29,7 @@ var ( ErrBlobBadChunkLen = errors.New("blob bad chunk len") ErrEOCUnexpected = errors.New("unexpected EOC") ErrTooDeep = errors.New("too deep structure") + ErrFloatBadInt = errors.New("non-int in float") ) func (ctx *Decoder) deTail() { @@ -38,12 +40,49 @@ func (ctx *Decoder) deTail() { } } +func (ctx *Decoder) popIntForFloat() (b *big.Int, err error) { + var sub types.Type + sub, err = ctx.DecodeAtom() + if err != nil { + return + } + switch sub { + case types.Int: + b = big.NewInt(ctx.ints[len(ctx.ints)-1]) + ctx.ints = ctx.ints[:len(ctx.ints)-1] + case types.UInt: + b = big.NewInt(0).SetUint64(ctx.uints[len(ctx.uints)-1]) + ctx.uints = ctx.uints[:len(ctx.uints)-1] + case types.BigInt: + b = ctx.bigints[len(ctx.bigints)-1] + ctx.bigints = ctx.bigints[:len(ctx.bigints)-1] + default: + err = ErrFloatBadInt + return + } + ctx.deTail() + return +} + func (ctx *Decoder) parse() (t types.Type, err error) { t, err = ctx.DecodeAtom() if err != nil { return } switch t { + case types.Float: + f := ctx.floats[len(ctx.floats)-1] + if f.NaN || f.Pinf || f.Ninf { + return + } + f.M, err = ctx.popIntForFloat() + if err != nil { + return + } + f.E, err = ctx.popIntForFloat() + if err != nil { + return + } case types.List: ctx.depth++ if ctx.depth < 0 { diff --git a/go/type.go b/go/type.go index ecc8d3a..9467f0d 100644 --- a/go/type.go +++ b/go/type.go @@ -4,25 +4,24 @@ type AtomType byte //go:generate stringer -type=AtomType const ( - AtomEOC AtomType = 0x00 - AtomNIL AtomType = 0x01 - AtomFalse AtomType = 0x02 - AtomTrue AtomType = 0x03 - AtomHexlet AtomType = 0x04 - AtomList AtomType = 0x08 - AtomMap AtomType = 0x09 - AtomBLOB AtomType = 0x0B - AtomPInt AtomType = 0x0C - AtomNInt AtomType = 0x0D - AtomFloat16 AtomType = 0x10 - AtomFloat32 AtomType = 0x11 - AtomFloat64 AtomType = 0x12 - AtomFloat128 AtomType = 0x13 - AtomFloat256 AtomType = 0x14 - AtomTAI64 AtomType = 0x18 - AtomTAI64N AtomType = 0x19 - AtomTAI64NA AtomType = 0x1A - AtomMagic AtomType = 0x4B + AtomEOC AtomType = 0x00 + AtomNIL AtomType = 0x01 + AtomFalse AtomType = 0x02 + AtomTrue AtomType = 0x03 + AtomHexlet AtomType = 0x04 + AtomList AtomType = 0x08 + AtomMap AtomType = 0x09 + AtomBLOB AtomType = 0x0B + AtomPInt AtomType = 0x0C + AtomNInt AtomType = 0x0D + AtomFloatNaN AtomType = 0x10 + AtomFloatPinf AtomType = 0x11 + AtomFloatNinf AtomType = 0x12 + AtomFloat AtomType = 0x13 + AtomTAI64 AtomType = 0x18 + AtomTAI64N AtomType = 0x19 + AtomTAI64NA AtomType = 0x1A + AtomMagic AtomType = 0x4B AtomStrings = 0x80 AtomIsUTF8 = 0x40 diff --git a/go/unmarshal.go b/go/unmarshal.go index 3d4eabe..f4acd74 100644 --- a/go/unmarshal.go +++ b/go/unmarshal.go @@ -94,7 +94,7 @@ func (ctx *Decoder) unmarshal(iter *Iterator) (v any, err error) { case types.BigInt: return iter.BigInt(), nil case types.Float: - panic("float is unsupported") + return iter.Float(), nil case types.TAI64: t := iter.TAI64() if ctx.opts != nil { diff --git a/py3/README b/py3/README index 60d9047..f6f689b 100644 --- a/py3/README +++ b/py3/README @@ -1,6 +1,7 @@ Python3 implementation of KEKS codec. -* No FLOAT*, TAI64NA, or nanoseconds support. +* Partial FLOAT support. +* No TAI64NA, or nanoseconds support. They are stored/decoded just as a raw value It is free software: see the file COPYING.LESSER for copying conditions. diff --git a/py3/keks.py b/py3/keks.py index a62ae4c..d0e2ee6 100755 --- a/py3/keks.py +++ b/py3/keks.py @@ -23,7 +23,7 @@ transparently replace JSON. It has :py:func:`loads` and :py:func:`dumps` functions, similar to native :py:module:`json` library's. KEKS supports dictionaries, lists, -None, booleans, UUID, IPv6 addresses, floats (currently not +None, booleans, UUID, IPv6 addresses, floats (currently not fully implemented!), integers (including big ones), datetime, UTF-8 and binary strings. @@ -39,6 +39,7 @@ from datetime import timedelta from datetime import timezone from ipaddress import IPv6Address from math import ceil as _ceil +from math import isnan as _isnan from uuid import UUID @@ -52,17 +53,17 @@ TagMap = 0x09 TagBlob = 0x0B TagPInt = 0x0C TagNInt = 0x0D -TagFloat16 = 0x10 -TagFloat32 = 0x11 -TagFloat64 = 0x12 -TagFloat128 = 0x13 -TagFloat256 = 0x14 +TagFloatNaN = 0x10 +TagFloatPinf = 0x11 +TagFloatNinf = 0x12 +TagFloat = 0x13 TagTAI64 = 0x18 TagTAI64N = 0x19 TagTAI64NA = 0x1A TagMagic = 0x4B TagStr = 0x80 TagUTF8 = 0x40 +NaN = float("nan") def _byte(v): @@ -82,6 +83,10 @@ TagNIntb = _byte(TagNInt) TagTAI64b = _byte(TagTAI64) TagTAI64Nb = _byte(TagTAI64N) TagMagicb = _byte(TagMagic) +TagFloatNaNb = _byte(TagFloatNaN) +TagFloatPinfb = _byte(TagFloatPinf) +TagFloatNinfb = _byte(TagFloatNinf) +TagFloatb = _byte(TagFloat) class DecodeError(ValueError): @@ -169,6 +174,25 @@ class Hexlet: return IPv6Address(self.v) +class Float: + __slots__ = ("m", "e") + + def __init__(self, m: int, e: int): + self.m = m + self.e = e + + def __eq__(self, other) -> bool: + if not isinstance(other, self.__class__): + return False + return (self.m == other.m) and (self.e == other.e) + + def __float__(self) -> float: + return self.m * pow(2, self.e) + + def __repr__(self) -> str: + return "Float(m=%d, e=%d)" % (self.m, self.e) + + Blob = namedtuple("Blob", ("l", "v")) @@ -260,7 +284,17 @@ def dumps(v): if isinstance(v, Hexlet): return TagHexletb + v.v if isinstance(v, float): - raise NotImplementedError("no FLOAT* support") + if v == 0.0: + return dumps(Float(0, 0)) + if _isnan(v): + return TagFloatNaNb + if v == float("+inf"): + return TagFloatPinfb + if v == float("-inf"): + return TagFloatNinfb + raise NotImplementedError("no full FLOAT support") + if isinstance(v, Float): + return TagFloatb + dumps(v.m) + dumps(v.e) if isinstance(v, datetime): ms = v.microsecond v = v.replace(microsecond=0) @@ -335,7 +369,6 @@ def _int(v): _EOC = object() -_floats = {TagFloat16: 2, TagFloat32: 4, TagFloat64: 8, TagFloat128: 16, TagFloat256: 32} _tais = {TagTAI64: 8, TagTAI64N: 12, TagTAI64NA: 16} @@ -372,11 +405,22 @@ def _loads(v, sets=False, leapsecUTCAllow=False, _allowContainers=True): if len(v) < 1+16: raise NotEnoughData(1+16-len(v)) return Hexlet(v[1:1+16]), v[1+16:] - l = _floats.get(b) - if l is not None: - if len(v) < 1+l: - raise NotEnoughData(1+l-len(v)) - return Raw(v[:1+l]), v[1+l:] + if b == TagFloatNaN: + return NaN, v[1:] + if b == TagFloatPinf: + return float("+inf"), v[1:] + if b == TagFloatNinf: + return float("-inf"), v[1:] + if b == TagFloat: + m, v = _loads(v[1:]) + if type(m) != int: + raise DecodeError("non-int m float") + e, v = _loads(v) + if type(e) != int: + raise DecodeError("non-int e float") + if (m == 0) and (e == 0): + return 0.0, v + return Float(m, e), v l = _tais.get(b) if l is not None: if len(v) < 1+l: diff --git a/py3/test-vector.py b/py3/test-vector.py index c5e4b78..687997b 100644 --- a/py3/test-vector.py +++ b/py3/test-vector.py @@ -17,7 +17,16 @@ data = { -123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789123456789, ], }, - "floats": [keks.Raw(keks._byte(keks.TagFloat32) + bytes.fromhex("01020304"))], + "floats": [ + keks.NaN, + float("+inf"), + float("-inf"), + 0.0, + keks.Float(-181, -2), # -45.25 + keks.Float(5, -5), # 0.15625 + keks.Float(-8687443681197687, -46), # -123.456 + keks.Float(12345678901234567890123456789, -12345), + ], "nil": None, "bool": [True, False], "str": { diff --git a/py3/tests/test_float.py b/py3/tests/test_float.py index 0df252f..60de06e 100644 --- a/py3/tests/test_float.py +++ b/py3/tests/test_float.py @@ -17,12 +17,14 @@ from unittest import TestCase from hypothesis import given +from hypothesis.strategies import integers -from keks import _byte +from keks import DecodeError from keks import dumps +from keks import Float from keks import loads +from keks import NaN from keks import NotEnoughData -from keks import Raw from tests.strategies import junk_st @@ -31,59 +33,66 @@ class TestFloat(TestCase): def test_throws_when_dumps_float(self) -> None: with self.assertRaises(NotImplementedError) as err: dumps(1.5) - self.assertEqual(str(err.exception), "no FLOAT* support") + self.assertEqual(str(err.exception), "no full FLOAT support") @given(junk_st) - def test_loads_16(self, junk: bytes) -> None: - decoded, tail = loads((b"\x10" + b"\x11" * 2) + junk) - self.assertEqual(decoded, Raw(_byte(0x10) + b"\x11" * 2)) + def test_nan(self, junk: bytes) -> None: + b = dumps(float("nan")) + self.assertSequenceEqual(b, b"\x10") + f, tail = loads(b + junk) self.assertSequenceEqual(tail, junk) + self.assertIs(f, NaN) @given(junk_st) - def test_loads_32(self, junk: bytes) -> None: - decoded, tail = loads((b"\x11" + b"\x11" * 4) + junk) - self.assertEqual(decoded, Raw(_byte(0x11) + b"\x11" * 4)) + def test_pinf(self, junk: bytes) -> None: + b = dumps(float("+inf")) + self.assertSequenceEqual(b, b"\x11") + f, tail = loads(b + junk) self.assertSequenceEqual(tail, junk) + self.assertEqual(f, float("+inf")) @given(junk_st) - def test_loads_64(self, junk: bytes) -> None: - decoded, tail = loads((b"\x12" + b"\x11" * 8) + junk) - self.assertEqual(decoded, Raw(_byte(0x12) + b"\x11" * 8)) + def test_ninf(self, junk: bytes) -> None: + b = dumps(float("-inf")) + self.assertSequenceEqual(b, b"\x12") + f, tail = loads(b + junk) self.assertSequenceEqual(tail, junk) + self.assertEqual(f, float("-inf")) @given(junk_st) - def test_loads_128(self, junk: bytes) -> None: - decoded, tail = loads((b"\x13" + b"\x11" * 16) + junk) - self.assertEqual(decoded, Raw(_byte(0x13) + b"\x11" * 16)) + def test_0(self, junk: bytes) -> None: + b = dumps(0.0) + self.assertSequenceEqual(b, b"\x13" + dumps(0) + dumps(0)) + f, tail = loads(b + junk) self.assertSequenceEqual(tail, junk) + self.assertEqual(f, 0.0) - @given(junk_st) - def test_loads_256(self, junk: bytes) -> None: - decoded, tail = loads((b"\x14" + b"\x11" * 32) + junk) - self.assertEqual(decoded, Raw(_byte(0x14) + b"\x11" * 32)) - self.assertSequenceEqual(tail, junk) - - def test_not_enough_data_16(self) -> None: + def test_not_enough(self) -> None: with self.assertRaises(NotEnoughData) as err: - loads(b"\x10" + b"\x11" * (2-1)) + loads(b"\x13") self.assertEqual(err.exception.n, 1) - def test_not_enough_data_32(self) -> None: + def test_not_enough_int(self) -> None: with self.assertRaises(NotEnoughData) as err: - loads(b"\x11" + b"\x11" * (4-1)) + loads(b"\x13" + dumps(123)) self.assertEqual(err.exception.n, 1) - def test_not_enough_data_64(self) -> None: - with self.assertRaises(NotEnoughData) as err: - loads(b"\x12" + b"\x11" * (8-1)) - self.assertEqual(err.exception.n, 1) + def test_non_int0(self) -> None: + with self.assertRaises(DecodeError) as err: + loads(b"\x13" + dumps(False) + dumps(123)) + self.assertEqual(str(err.exception), "non-int m float") - def test_not_enough_data_128(self) -> None: - with self.assertRaises(NotEnoughData) as err: - loads(b"\x13" + b"\x11" * (16-1)) - self.assertEqual(err.exception.n, 1) + def test_non_int1(self) -> None: + with self.assertRaises(DecodeError) as err: + loads(b"\x13" + dumps(123) + dumps(False)) + self.assertEqual(str(err.exception), "non-int e float") - def test_not_enough_data_256(self) -> None: - with self.assertRaises(NotEnoughData) as err: - loads(b"\x14" + b"\x11" * (32-1)) - self.assertEqual(err.exception.n, 1) + @given(integers(), integers(), junk_st) + def test_symmetric(self, m: int, e: int, junk: bytes) -> None: + f, tail = loads(dumps(Float(m, e)) + junk) + self.assertSequenceEqual(tail, junk) + if (m == 0) and (e == 0): + self.assertEqual(f, 0.0) + else: + self.assertEqual(f.m, m) + self.assertEqual(f.e, e) diff --git a/spec/encoding/FLOAT b/spec/encoding/FLOAT index 2e52413..d978f9f 100644 --- a/spec/encoding/FLOAT +++ b/spec/encoding/FLOAT @@ -1,14 +1,27 @@ - ==================================================== - WARNING - ==================================================== - Currently not implemented and format is not fixed. - ==================================================== +Floats consists of [encoding/INT]-encoded mantissa "m" and +base-2 exponent "e": m * pow(2,e). -Floats are encoded in IEEE 754 binary formats: half, single, double, -quadruple, octuple precision ones. +Normalised values *must* be used. +Separate tag's values are used for representing NaN and infinite +numbers. Zero is represented as zero mantissa and exponent. -Negative zero *must not* be used. Shortest possible form *must* be used. + NaN = 0x10 + +inf = 0x11 + -inf = 0x12 + float(m, e) = 0x13 || INT(m) || INT(e) + 0.0 = float(0, 0) -Hint: look at CBOR's RFC for example code of float16 conversion. +That representation is far from being compact. Sending binary IEEE754 +binaries may be more preferable. -Maybe there appear additional restrictions and rules. +Example representations: + +NaN | 10 ++inf | 11 +-inf | 12 +0.0 | 13 0C80 0C80 +45.25 m=181 e=-2 | 13 0C81B5 0D8101 +-45.25 m=-181 e=-2 | 13 0D81B4 0D8101 +0.125 m=1 e=-3 | 13 0C8101 0D8102 +0.15625 m=5 e=-5 | 13 0C8105 0D8104 +123.456 m=8687443681197687 e=-46 | 13 0C871EDD2F1A9FBE77 0D812D diff --git a/spec/encoding/FullTable b/spec/encoding/FullTable index 6bd0d64..2999c36 100644 --- a/spec/encoding/FullTable +++ b/spec/encoding/FullTable @@ -14,11 +14,11 @@ dec | hex | bin | vlen | 012 | 0C | 00001100 | 1+~ | + [encoding/INT] 013 | 0D | 00001101 | 1+~ | - [encoding/INT] 014 | 0E | 00001110 | 0 | -015 | 0F | 00001111 | ? | [encoding/FLOAT] -016 | 10 | 00010000 | 0 | -017 | 11 | 00010001 | 0 | -018 | 12 | 00010010 | 0 | -019 | 13 | 00010011 | 0 | +015 | 0F | 00001111 | 0 | +016 | 10 | 00010000 | 0 | [encoding/FLOAT] NaN +017 | 11 | 00010001 | 0 | [encoding/FLOAT] +inf +018 | 12 | 00010010 | 0 | [encoding/FLOAT] -inf +019 | 13 | 00010011 | 4+~ | [encoding/FLOAT] 020 | 14 | 00010100 | 0 | 021 | 15 | 00010101 | 0 | 022 | 16 | 00010110 | 0 | diff --git a/spec/encoding/index b/spec/encoding/index index d61de13..b7f37ba 100644 --- a/spec/encoding/index +++ b/spec/encoding/index @@ -17,7 +17,10 @@ dec | hex | bin | vlen | 012 | 0C | 00001100 | 1+~ | + [encoding/INT] 013 | 0D | 00001101 | 1+~ | - [encoding/INT] ... | ... | ... | ... | ... -015 | 0F | 00001111 | ? | [encoding/FLOAT] +016 | 10 | 00010000 | 0 | [encoding/FLOAT] NaN +017 | 11 | 00010001 | 0 | [encoding/FLOAT] +inf +018 | 12 | 00010010 | 0 | [encoding/FLOAT] -inf +019 | 13 | 00010011 | 4+~ | [encoding/FLOAT] ... | ... | ... | ... | ... 024 | 18 | 00011000 | 8 | [encoding/TAI]64 025 | 19 | 00011001 | 12 | [encoding/TAI]64N diff --git a/tcl/README b/tcl/README index 1098345..ba00560 100644 --- a/tcl/README +++ b/tcl/README @@ -1,5 +1,3 @@ Tcl implementation of the KEKS encoder. -* No FLOAT* support. They can be stored just as a raw value. - It is free software: see the file COPYING.LESSER for copying conditions. diff --git a/tcl/keks.tcl b/tcl/keks.tcl index 5089f06..8da7348 100755 --- a/tcl/keks.tcl +++ b/tcl/keks.tcl @@ -28,6 +28,8 @@ proc char {v} { add [binary format c $v] } +######################################################################## + # v is a complete raw value of the atom. proc RAW {v} { upvar buf buf @@ -54,6 +56,8 @@ proc TRUE {} { char [expr 0x03] } +######################################################################## + # v is either 16-bytes string, or UUID or uncompressed IP address. proc HEXLET {v} { set v [binary decode hex [string map {- "" : ""} $v]] @@ -76,6 +80,8 @@ proc MAGIC {v} { add [string repeat [binary format c 0] [expr {12 - $l}]] } +######################################################################## + proc toBEbin {l v} { set a {} for {set i 0} {$i < $l} {incr i} { @@ -94,7 +100,7 @@ proc INT {v} { upvar buf buf if {$v >= 0} { char [expr 0x0C] - } { + } else { char [expr 0x0D] set v [expr {- ($v + 1)}] } @@ -112,6 +118,29 @@ proc INT {v} { BIN [toBEbin [expr {$l + 1}] $v] } +# Either "nan", "+inf", "-inf", or integers {mantissa exponent}. +proc FLOAT {v} { + upvar buf buf + switch $v { + nan { + char [expr 0x10] + } + +inf { + char [expr 0x11] + } + -inf { + char [expr 0x12] + } + default { + char [expr 0x13] + INT [lindex $v 0] + INT [lindex $v 1] + } + } +} + +######################################################################## + proc _str {atom v} { set ll 0 set vl [string length $v] @@ -147,6 +176,27 @@ proc STR {v} { _str [expr {0x80 | 0x40}] [encoding convertto utf-8 $v] } +proc BLOB {chunkLen v} { + upvar buf buf + char [expr 0x0B] + toBE 8 [expr {$chunkLen - 1}] + set vl [string length $v] + set chunks [expr {$vl / $chunkLen}] + for {set i 0} {$i < $chunks} {incr i} { + BIN [string range $v \ + [expr {$i * $chunkLen}] \ + [expr {(($i + 1) * $chunkLen) - 1}]] + } + set left [expr {$vl - ($chunks * $chunkLen)}] + if {$left == 0} { + BIN "" + } else { + BIN [string range $v [expr {$vl - $left}] end] + } +} + +######################################################################## + # v is a list of values that will be eval-ed. proc LIST {v} { upvar buf buf @@ -211,24 +261,7 @@ proc SET {v} { MAP $args } -proc BLOB {chunkLen v} { - upvar buf buf - char [expr 0x0B] - toBE 8 [expr {$chunkLen - 1}] - set vl [string length $v] - set chunks [expr {$vl / $chunkLen}] - for {set i 0} {$i < $chunks} {incr i} { - BIN [string range $v \ - [expr {$i * $chunkLen}] \ - [expr {(($i + 1) * $chunkLen) - 1}]] - } - set left [expr {$vl - ($chunks * $chunkLen)}] - if {$left == 0} { - BIN "" - } else { - BIN [string range $v [expr {$vl - $left}] end] - } -} +######################################################################## # v is a "2006-01-02 15:04:05"-formatted string that is converted to seconds. proc ISOToSec {v} { @@ -308,7 +341,9 @@ proc TAI64 {v {n 0} {a 0}} { } } -namespace export EOC NIL FALSE TRUE HEXLET MAGIC INT STR BIN RAW +######################################################################## + +namespace export EOC NIL FALSE TRUE HEXLET MAGIC INT STR BIN FLOAT RAW namespace export TAI64 ToTAI ISOToSec namespace export LIST MAP SET LenFirstSort BLOB diff --git a/tcl/mk-fuzz-inputs b/tcl/mk-fuzz-inputs index 3771b88..614e29b 100755 --- a/tcl/mk-fuzz-inputs +++ b/tcl/mk-fuzz-inputs @@ -32,3 +32,9 @@ dump 'RAW [binary decode hex "1840000000586846A4"]' >tai-leap dump 'TAI64 1234 1234' >tai-ns dump 'TAI64 1234 1234 1234' >tai-as dump "MAGIC fuzz" >magic +dump "FLOAT nan" >float=nan +dump "FLOAT +inf" >float=+inf +dump "FLOAT -inf" >float=-inf +dump "FLOAT {0 0}" >float=0 +dump "FLOAT {123 45}" >float=123,45 +dump "FLOAT {-123 -45}" >float=-123,-45 diff --git a/tcl/test-vector.tcl b/tcl/test-vector.tcl index 8ee7fe0..680f46d 100644 --- a/tcl/test-vector.tcl +++ b/tcl/test-vector.tcl @@ -30,7 +30,14 @@ MAP { }} }} floats {LIST { - {RAW [binary decode hex "1101020304"]} + {FLOAT nan} + {FLOAT +inf} + {FLOAT -inf} + {FLOAT {0 0}} + {FLOAT {-181 -2}} + {FLOAT {5 -5}} + {FLOAT {-8687443681197687 -46}} + {FLOAT {12345678901234567890123456789 -12345}} }} nil NIL bool {LIST {TRUE FALSE}} -- 2.50.0