(*cp) = YACUTF8InvalidCp;
return 0;
}
+ if (str[0] == 0) {
+ (*cp) = YACUTF8InvalidCp;
+ return 1;
+ }
size_t off = 0;
for (off = 0; off < 4; off++) {
- if (BETWEEN(((const unsigned char *)str)[0], lut[off].lower, lut[off].upper)) {
- (*cp) = ((const unsigned char *)str)[0] - lut[off].lower;
+ if (BETWEEN(str[0], lut[off].lower, lut[off].upper)) {
+ (*cp) = str[0] - lut[off].lower;
break;
}
}
if ((1 + off) > len) {
(*cp) = YACUTF8InvalidCp;
for (i = 0; 1 + i < len; i++) {
- if (!BETWEEN(((const unsigned char *)str)[1 + i], 0x80, 0xBF)) {
+ if (!BETWEEN(str[1 + i], 0x80, 0xBF)) {
break;
}
}
return ((1 + i) < len) ? (1 + i) : (1 + off);
}
for (i = 1; i <= off; i++) {
- if (!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) {
+ if (!BETWEEN(str[i], 0x80, 0xBF)) {
(*cp) = YACUTF8InvalidCp;
return 1 + (i - 1);
}
- (*cp) = (*cp << 6) | (((const unsigned char *)str)[i] & 0x3F);
+ (*cp) = (*cp << 6) | (str[i] & 0x3F);
}
if ((*cp < lut[off].mincp) || BETWEEN(*cp, 0xD800, 0xDFFF) || (*cp > 0x10FFFF)) {
(*cp) = YACUTF8InvalidCp;
obj, tail = BaseString.decode(data)
assert obj.utf8 is True
try:
- return klass(obj.v.decode("utf-8")), tail
+ v = obj.v.decode("utf-8")
except UnicodeDecodeError as err:
raise DecodeError("invalid UTF-8") from err
+ if "\x00" in v:
+ raise DecodeError("null byte in UTF-8")
+ return klass(v), tail
def __repr__(self):
return "STR(" + self.v.decode("utf-8") + ")"