]> Cypherpunks repositories - keks.git/commitdiff
Explicitly disallow null byte in UTF-8
authorSergey Matveev <stargrave@stargrave.org>
Mon, 7 Oct 2024 12:11:57 +0000 (15:11 +0300)
committerSergey Matveev <stargrave@stargrave.org>
Mon, 7 Oct 2024 12:53:46 +0000 (15:53 +0300)
cyac/utf8.c
gyac/dec.go
pyac/pyac.py

index a593370f8221163fd2f6f24e586412102d3960f1aaf7fefe23e342fc97750bac..9d6c4a88c4cd143febda248b4f6be5b50027cb9ccd893d2b55216a1ca34171db 100644 (file)
@@ -61,10 +61,14 @@ YACUTF8CpDecode(uint32_t *cp, const unsigned char *str, const size_t len)
         (*cp) = YACUTF8InvalidCp;
         return 0;
     }
+    if (str[0] == 0) {
+        (*cp) = YACUTF8InvalidCp;
+        return 1;
+    }
     size_t off = 0;
     for (off = 0; off < 4; off++) {
-        if (BETWEEN(((const unsigned char *)str)[0], lut[off].lower, lut[off].upper)) {
-            (*cp) = ((const unsigned char *)str)[0] - lut[off].lower;
+        if (BETWEEN(str[0], lut[off].lower, lut[off].upper)) {
+            (*cp) = str[0] - lut[off].lower;
             break;
         }
     }
@@ -76,18 +80,18 @@ YACUTF8CpDecode(uint32_t *cp, const unsigned char *str, const size_t len)
     if ((1 + off) > len) {
         (*cp) = YACUTF8InvalidCp;
         for (i = 0; 1 + i < len; i++) {
-            if (!BETWEEN(((const unsigned char *)str)[1 + i], 0x80, 0xBF)) {
+            if (!BETWEEN(str[1 + i], 0x80, 0xBF)) {
                 break;
             }
         }
         return ((1 + i) < len) ? (1 + i) : (1 + off);
     }
     for (i = 1; i <= off; i++) {
-        if (!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) {
+        if (!BETWEEN(str[i], 0x80, 0xBF)) {
             (*cp) = YACUTF8InvalidCp;
             return 1 + (i - 1);
         }
-        (*cp) = (*cp << 6) | (((const unsigned char *)str)[i] & 0x3F);
+        (*cp) = (*cp << 6) | (str[i] & 0x3F);
     }
     if ((*cp < lut[off].mincp) || BETWEEN(*cp, 0xD800, 0xDFFF) || (*cp > 0x10FFFF)) {
         (*cp) = YACUTF8InvalidCp;
index c805f522cc8d39ad9f5f0f79da61f27745b71b62cc91b9e1bac23154a884bace..78ed7c6cb00ce6a91ad9b1e95a5686b7e6e9e1eb024462cce1670bf82536425d 100644 (file)
@@ -19,6 +19,7 @@ import (
        "encoding/hex"
        "errors"
        "fmt"
+       "strings"
        "unicode/utf8"
        "unsafe"
 
@@ -128,6 +129,9 @@ func AtomDecode(buf []byte) (item *Item, off int, err error) {
                        if !utf8.ValidString(s) {
                                err = ErrBadUTF8
                        }
+                       if strings.Contains(s, "\x00") {
+                               err = ErrBadUTF8
+                       }
                }
                return
        }
index 6c34d7221a81748d44e60d7edf6c2252adfdd737890a39404fe158979a9cc0cb..6ac96680e740915e09fab4bd81f1727ff1eec442e165508abe428db23e0f0d41 100644 (file)
@@ -571,9 +571,12 @@ class Str(BaseString):
         obj, tail = BaseString.decode(data)
         assert obj.utf8 is True
         try:
-            return klass(obj.v.decode("utf-8")), tail
+            v = obj.v.decode("utf-8")
         except UnicodeDecodeError as err:
             raise DecodeError("invalid UTF-8") from err
+        if "\x00" in v:
+            raise DecodeError("null byte in UTF-8")
+        return klass(v), tail
 
     def __repr__(self):
         return "STR(" + self.v.decode("utf-8") + ")"