From: Sergey Matveev Date: Mon, 7 Oct 2024 12:11:57 +0000 (+0300) Subject: Explicitly disallow null byte in UTF-8 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=dd4897e18d8544056ce1f0a7d7d2f67d4e5e1a18d780a72b6aa22b1208fbca10;p=keks.git Explicitly disallow null byte in UTF-8 --- diff --git a/cyac/utf8.c b/cyac/utf8.c index a593370..9d6c4a8 100644 --- a/cyac/utf8.c +++ b/cyac/utf8.c @@ -61,10 +61,14 @@ YACUTF8CpDecode(uint32_t *cp, const unsigned char *str, const size_t len) (*cp) = YACUTF8InvalidCp; return 0; } + if (str[0] == 0) { + (*cp) = YACUTF8InvalidCp; + return 1; + } size_t off = 0; for (off = 0; off < 4; off++) { - if (BETWEEN(((const unsigned char *)str)[0], lut[off].lower, lut[off].upper)) { - (*cp) = ((const unsigned char *)str)[0] - lut[off].lower; + if (BETWEEN(str[0], lut[off].lower, lut[off].upper)) { + (*cp) = str[0] - lut[off].lower; break; } } @@ -76,18 +80,18 @@ YACUTF8CpDecode(uint32_t *cp, const unsigned char *str, const size_t len) if ((1 + off) > len) { (*cp) = YACUTF8InvalidCp; for (i = 0; 1 + i < len; i++) { - if (!BETWEEN(((const unsigned char *)str)[1 + i], 0x80, 0xBF)) { + if (!BETWEEN(str[1 + i], 0x80, 0xBF)) { break; } } return ((1 + i) < len) ? (1 + i) : (1 + off); } for (i = 1; i <= off; i++) { - if (!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) { + if (!BETWEEN(str[i], 0x80, 0xBF)) { (*cp) = YACUTF8InvalidCp; return 1 + (i - 1); } - (*cp) = (*cp << 6) | (((const unsigned char *)str)[i] & 0x3F); + (*cp) = (*cp << 6) | (str[i] & 0x3F); } if ((*cp < lut[off].mincp) || BETWEEN(*cp, 0xD800, 0xDFFF) || (*cp > 0x10FFFF)) { (*cp) = YACUTF8InvalidCp; diff --git a/gyac/dec.go b/gyac/dec.go index c805f52..78ed7c6 100644 --- a/gyac/dec.go +++ b/gyac/dec.go @@ -19,6 +19,7 @@ import ( "encoding/hex" "errors" "fmt" + "strings" "unicode/utf8" "unsafe" @@ -128,6 +129,9 @@ func AtomDecode(buf []byte) (item *Item, off int, err error) { if !utf8.ValidString(s) { err = ErrBadUTF8 } + if strings.Contains(s, "\x00") { + err = ErrBadUTF8 + } } return } diff --git a/pyac/pyac.py b/pyac/pyac.py index 6c34d72..6ac9668 100644 --- a/pyac/pyac.py +++ b/pyac/pyac.py @@ -571,9 +571,12 @@ class Str(BaseString): obj, tail = BaseString.decode(data) assert obj.utf8 is True try: - return klass(obj.v.decode("utf-8")), tail + v = obj.v.decode("utf-8") except UnicodeDecodeError as err: raise DecodeError("invalid UTF-8") from err + if "\x00" in v: + raise DecodeError("null byte in UTF-8") + return klass(v), tail def __repr__(self): return "STR(" + self.v.decode("utf-8") + ")"