From: Sergey Matveev Date: Fri, 18 Oct 2024 09:17:55 +0000 (+0300) Subject: Deterministic decoding of string lengths X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=e85f972d9a48ecad4df31ea099f8b9b1280e662f708ce9cb1cb5c548e9514cfe;p=keks.git Deterministic decoding of string lengths --- diff --git a/cyac/cmd/test-vector/test-vector.c b/cyac/cmd/test-vector/test-vector.c index 3e54054..ddf9ea5 100644 --- a/cyac/cmd/test-vector/test-vector.c +++ b/cyac/cmd/test-vector/test-vector.c @@ -25,7 +25,7 @@ main(int argc, char **argv) const size_t len = 68 * 1024; unsigned char *buf = malloc(len); assert(buf != NULL); - unsigned char *bin = malloc(1 << 16); + unsigned char *bin = malloc(1 << 17); assert(bin != NULL); adder(YACAtomMapEncode(&Got, buf + Off, len - Off)); // . @@ -48,10 +48,14 @@ main(int argc, char **argv) adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 61)); memset(bin, '2', 255); adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 255)); + memset(bin, 'A', 61 + 255); + adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 61 + 255)); + memset(bin, 'B', 62 + 255); + adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 62 + 255)); memset(bin, '3', 1024); adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 1024)); - memset(bin, '4', 1 << 16); - adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 1 << 16)); + memset(bin, '4', 63 + 255 + 65535 + 1); + adder(YACAtomBinEncode(&Got, buf + Off, len - Off, bin, 63 + 255 + 65535 + 1)); adder(YACAtomEOCEncode(&Got, buf + Off, len - Off)); // .str.bin adder( diff --git a/cyac/lib/dec.c b/cyac/lib/dec.c index 588744a..b8cfd1d 100644 --- a/cyac/lib/dec.c +++ b/cyac/lib/dec.c @@ -47,9 +47,11 @@ YACAtomDecode( break; case 62: ll = 2; + l += 255; break; case 63: ll = 8; + l += 255 + 65535; break; } if (ll != 0) { @@ -57,14 +59,11 @@ YACAtomDecode( if (len < (*got)) { return YACErrNotEnough; } - l = yacFromBE(buf + 1, ll); - if ((l < 61) || ((l < (1 << 8)) && (ll > 1)) || - ((l < (1 << 16)) && (ll > 2))) { - return YACErrLenNonMinimal; + uint64_t ul = yacFromBE(buf + 1, ll); + if (ul > (SIZE_MAX - (63 + 255 + 65535))) { + return YACErrLenTooBig; } - } - if (l > SIZE_MAX) { - return YACErrLenTooBig; + l += ul; } const size_t ls = (size_t)l; (*got) += ls; diff --git a/cyac/lib/enc.c b/cyac/lib/enc.c index b737e25..22509bf 100644 --- a/cyac/lib/enc.c +++ b/cyac/lib/enc.c @@ -175,18 +175,18 @@ yacAtomStrEncode( unsigned char lVal = 0; size_t lLen = 0; unsigned char lBuf[8] = {0}; - if (srcLen >= ((uint64_t)1 << 16)) { + if (srcLen >= (63 + 255 + 65535)) { lVal = 63; lLen = 8; - yacToBE(lBuf, 8, (uint64_t)srcLen); - } else if (srcLen >= ((uint64_t)1 << 8)) { + yacToBE(lBuf, 8, (uint64_t)srcLen - (63 + 255 + 65535)); + } else if (srcLen >= (62 + 255)) { lVal = 62; lLen = 2; - yacToBE(lBuf, 2, (uint64_t)srcLen); - } else if (srcLen > 60) { + yacToBE(lBuf, 2, (uint64_t)srcLen - (62 + 255)); + } else if (srcLen >= 61) { lVal = 61; lLen = 1; - lBuf[0] = (unsigned char)(srcLen & 0xFF); + lBuf[0] = (unsigned char)((srcLen - 61) & 0xFF); } else { lVal = (unsigned char)srcLen; } diff --git a/cyac/lib/err.c b/cyac/lib/err.c index 2040665..5908e3f 100644 --- a/cyac/lib/err.c +++ b/cyac/lib/err.c @@ -12,8 +12,6 @@ YACErr2Str(const enum YACErr err) return "NotEnough"; case YACErrUnknownType: return "UnknownType"; - case YACErrLenNonMinimal: - return "LenNonMinimal"; case YACErrLenTooBig: return "LenTooBig"; case YACErrBadUTF8: diff --git a/cyac/lib/err.h b/cyac/lib/err.h index 1f35725..711d81b 100644 --- a/cyac/lib/err.h +++ b/cyac/lib/err.h @@ -11,8 +11,6 @@ // Not enough data. Atom's @code{.off} must contain how much. // @item YACErrUnknownType, // unknown atom's type // Unknown atom's type. -// @item YACErrLenNonMinimal -// Non minimal string's length encoding. // @item YACErrLenTooBig, // Too long string (>1<<60), can not be decoded. // @item YACErrBadUTF8 @@ -48,7 +46,6 @@ enum YACErr { YACErrNo = 1, YACErrNotEnough, YACErrUnknownType, - YACErrLenNonMinimal, YACErrLenTooBig, YACErrBadUTF8, YACErrIntNonMinimal, diff --git a/cyac/lib/pki/cer.c b/cyac/lib/pki/cer.c index 3448f1a..9929bf0 100644 --- a/cyac/lib/pki/cer.c +++ b/cyac/lib/pki/cer.c @@ -262,7 +262,7 @@ YACCerVerify( const size_t items = 5; struct YACItem tbsItems[5]; memset(&tbsItems, 0, sizeof tbsItems); - struct YACItems tbs = {.list = tbsItems, items, items}; + struct YACItems tbs = {.list = tbsItems, .offsets=NULL, .len=items, .cap=-1}; tbsItems[0].atom.typ = YACItemMap; tbsItems[0].atom.v.list.head = 1; diff --git a/gyac/cmd/test-vector-anys/main.go b/gyac/cmd/test-vector-anys/main.go index 0f63d6b..0dadf2b 100644 --- a/gyac/cmd/test-vector-anys/main.go +++ b/gyac/cmd/test-vector-anys/main.go @@ -42,8 +42,10 @@ func main() { bytes.Repeat([]byte{'0'}, 60), bytes.Repeat([]byte{'1'}, 61), bytes.Repeat([]byte{'2'}, 255), + bytes.Repeat([]byte{'A'}, 61+255), + bytes.Repeat([]byte{'B'}, 62+255), bytes.Repeat([]byte{'3'}, 1024), - bytes.Repeat([]byte{'4'}, 1<<16), + bytes.Repeat([]byte{'4'}, 63+255+65535+1), }, "utf8": "привет мир", }, diff --git a/gyac/cmd/test-vector-manual/main.go b/gyac/cmd/test-vector-manual/main.go index aa0048b..5f7b0f4 100644 --- a/gyac/cmd/test-vector-manual/main.go +++ b/gyac/cmd/test-vector-manual/main.go @@ -30,8 +30,10 @@ func main() { buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'0'}, 60)) buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'1'}, 61)) buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'2'}, 255)) + buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'A'}, 61+255)) + buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'B'}, 62+255)) buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'3'}, 1024)) - buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'4'}, 1<<16)) + buf = gyac.AtomBinEncode(buf, bytes.Repeat([]byte{'4'}, 63+255+65535+1)) } buf = gyac.AtomEOCEncode(buf) { diff --git a/gyac/dec.go b/gyac/dec.go index 459b7c4..3d6c7e2 100644 --- a/gyac/dec.go +++ b/gyac/dec.go @@ -66,7 +66,6 @@ func (raw *Raw) String() string { var ( ErrNotEnough = errors.New("not enough data") - ErrLenNonMinimal = errors.New("non-minimal len") ErrLenTooBig = errors.New("string len >1<<60") ErrIntNonMinimal = errors.New("int non minimal") ErrUnknownType = errors.New("unknown type") @@ -99,8 +98,10 @@ func AtomDecode(buf []byte) (item *Item, off int, err error) { ll = 1 case 62: ll = 2 + l += ((1 << 8) - 1) case 63: ll = 8 + l += ((1 << 8) - 1) + ((1 << 16) - 1) } if ll != 0 { off += ll @@ -109,15 +110,11 @@ func AtomDecode(buf []byte) (item *Item, off int, err error) { return } ul := FromBE(buf[1 : 1+ll]) - if ul > (1 << 63) { + if ul > (1<<63)-(63+((1<<8)-1)+((1<<16)-1)) { err = ErrLenTooBig return } - l = int(ul) - if (l < 61) || ((l < (1 << 8)) && (ll > 1)) || ((l < (1 << 16)) && (ll > 2)) { - err = ErrLenNonMinimal - return - } + l += int(ul) } off += l if off <= 0 { diff --git a/gyac/enc.go b/gyac/enc.go index b59d8de..419653e 100644 --- a/gyac/enc.go +++ b/gyac/enc.go @@ -220,19 +220,19 @@ func AtomBlobEncode(buf []byte, chunkLen int) []byte { } func atomStrEncode(buf, data []byte, utf8 bool) []byte { - lv := 0 + var lv int var l []byte - if len(data) >= (1 << 16) { + if len(data) >= 63+((1<<8)-1)+((1<<16)-1) { lv = 63 l = make([]byte, 8) - ToBE(l, uint64(len(data))) - } else if len(data) >= (1 << 8) { + ToBE(l, uint64(len(data)-(lv+((1<<8)-1)+((1<<16)-1)))) + } else if len(data) >= 62+255 { lv = 62 l = make([]byte, 2) - ToBE(l, uint64(len(data))) - } else if len(data) > 60 { + ToBE(l, uint64(len(data)-(lv+((1<<8)-1)))) + } else if len(data) >= 61 { lv = 61 - l = []byte{byte(len(data))} + l = []byte{byte(len(data) - lv)} } else { lv = len(data) } diff --git a/pyac/pyac.py b/pyac/pyac.py index c419ee5..0495bf7 100644 --- a/pyac/pyac.py +++ b/pyac/pyac.py @@ -19,7 +19,6 @@ class DecodeError(ValueError): WrongTag = DecodeError("wrong tag") -NonMinimal = DecodeError("non-miminal encoding") class NotEnoughData(DecodeError): @@ -183,7 +182,7 @@ class Int: raise NotEnoughData(l) v = int.from_bytes(data[:l], "big") if v < 32: - raise NonMinimal + raise DecodeError("non-miminal encoding") if neg: v = -1 - v return klass(v), data[l:] @@ -515,14 +514,17 @@ class BaseString: def encode(self): l = len(self.v) - if l >= (1<<16): + if l >= (63 + ((1<<8)-1) + ((1<<16)-1)): lv = 63 + l -= (lv + ((1<<8)-1) + ((1<<16)-1)) lb = l.to_bytes(8, "big") - elif l >= (1<<8): + elif l >= (62 + ((1<<8)-1)): lv = 62 + l -= (lv + ((1<<8)-1)) lb = l.to_bytes(2, "big") elif l >= 61: lv = 61 + l -= lv lb = l.to_bytes(1, "big") else: lv = l @@ -545,18 +547,15 @@ class BaseString: llen = 1 elif l == 62: llen = 2 + l += ((1<<8)-1) elif l == 63: llen = 8 + l += ((1<<8)-1) + ((1<<16)-1) data = data[1:] if llen > 0: if len(data) < llen: raise NotEnoughData(llen) - l = int.from_bytes(data[:llen], "big") - if ( - (l < 61) or ((l < (1<<8)) and (llen > 1)) or - ((l < (1<<16)) and (llen > 2)) - ): - raise NonMinimal + l += int.from_bytes(data[:llen], "big") data = data[llen:] if len(data) < l: raise NotEnoughData(l) diff --git a/pyac/test-vector.py b/pyac/test-vector.py index 9a775d4..beb8710 100644 --- a/pyac/test-vector.py +++ b/pyac/test-vector.py @@ -15,8 +15,10 @@ data = { 60 * b"0", 61 * b"1", 255 * b"2", + (61+255) * b"A", + (62+255) * b"B", 1024 * b"3", - (1<<16) * b"4", + (63+255+65535+1) * b"4", ], "utf8": "привет мир", }, diff --git a/spec/encoding/str.texi b/spec/encoding/str.texi index be0e6e9..8f6607d 100644 --- a/spec/encoding/str.texi +++ b/spec/encoding/str.texi @@ -23,11 +23,11 @@ If length value equals to: @item 0-60 Use as is. @item 61 - Then next 8-bits are the actual length. + 61 plus next 8-bits value. @item 62 - Then next 16-bits (big-endian) are the actual length. + 62 plus 255 plus next big-endian 16-bits value. @item 63 - Then next 64-bits (big-endian) are the actual length. + 63 plus 255 plus 65535 plus next big-endian 64-bits value. @end table String's length @strong{must} be encoded in shortest possible form. @@ -41,7 +41,7 @@ Example representations: @item 0-byte binary string @tab @code{80} @item 4-byte binary string @code{0x01 0x02 0x03 0x04} @tab @code{84 01 02 03 04} -@item 64-byte binary string with 0x41 @tab @code{BD 40 41 41 .. 41} +@item 64-byte binary string with 0x41 @tab @code{BD 03 41 41 .. 41} @item UTF-8 string "привет мир" ("hello world" on russian) @tab @code{D3 D0 BF D1 80 D0 B8 D0 B2 D0 B5 D1 82 20 D0 BC D0 B8 D1 80} diff --git a/tyac/test-vector.tcl b/tyac/test-vector.tcl index bd508dd..abecf2c 100644 --- a/tyac/test-vector.tcl +++ b/tyac/test-vector.tcl @@ -36,8 +36,10 @@ MAP { {BIN [string repeat "0" 60]} {BIN [string repeat "1" 61]} {BIN [string repeat "2" 255]} + {BIN [string repeat "A" [expr {61+255}]]} + {BIN [string repeat "B" [expr {62+255}]]} {BIN [string repeat "3" 1024]} - {BIN [string repeat "4" [expr {1 << 16}]]} + {BIN [string repeat "4" [expr {63+255+65535+1}]]} }} }} blob {LIST { diff --git a/tyac/tyac.tcl b/tyac/tyac.tcl index 1a12867..837351c 100644 --- a/tyac/tyac.tcl +++ b/tyac/tyac.tcl @@ -73,15 +73,18 @@ proc _str {atom v} { set ll 0 set vl [string length $v] set lv $vl - if {$vl >= [expr {1 << 16}]} { + if {$vl >= [expr {63 + 255 + 65535}]} { set lv 63 set ll 8 - } elseif {$vl >= [expr {1 << 8}]} { + set vl [expr {$vl - 63 - 255 - 65535}] + } elseif {$vl >= [expr {62 + 255}]} { set lv 62 set ll 2 + set vl [expr {$vl - 62 - 255}] } elseif {$vl > 60} { set lv 61 set ll 1 + set vl [expr {$vl - 61}] } char [expr {$atom | $lv}] if {$ll > 0} { toBE $ll $vl }