Missing invalid UTF-8 strategies

author Sergey Matveev <stargrave@stargrave.org>

Sun, 1 Dec 2024 09:17:50 +0000 (12:17 +0300)

committer Sergey Matveev <stargrave@stargrave.org>

Sun, 1 Dec 2024 09:17:50 +0000 (12:17 +0300)
author Sergey Matveev <stargrave@stargrave.org>
Sun, 1 Dec 2024 09:17:50 +0000 (12:17 +0300)
committer Sergey Matveev <stargrave@stargrave.org>
Sun, 1 Dec 2024 09:17:50 +0000 (12:17 +0300)
diff --git a/pyac/tests/test_str.py b/pyac/tests/test_str.py

index c8f21c7bda0d813f326497bfe9ff9784a6be97e9f1f9e20ba04055622c5b8db9..3b1ed127ced477ce00f7f993acc28c92580526d599cd994ed242b7e9d4a4f601 100644 (file)
--- a/pyac/tests/test_str.py
+++ b/pyac/tests/test_str.py
@@ -31,10 +31,20 @@ from tests.strategies import unicode_allowed
  TagStr: int = 0x80
  TagUTF8: int = 0x40
  
+# Here go invalid UTF-8 byte(s) strategies. We can fail in following
+# ways during decoding:
+# * invalid start byte -- hypothesis generates integers that will
+#   satisfy that fail
+# * invalid continuation byte -- we strip last byte of multibyte
+#   sequences
+# * unexpected end of data -- we strip the last byte of our data
+
+# 110xxxxx 10xxxxxx(STRIPPED)
  invalid_utf8_2byte_s = integers(min_value=0, max_value=(1 << 5) - 1).map(
      lambda v: bytes(bytearray([1 << 7 | 1 << 6 | v]))
  )
  
+# 1110xxxx 10xxxxxx 10xxxxxx(STRIPPED)
  invalid_utf8_3byte_s = integers(min_value=0, max_value=(1 << 10) - 1).map(
      lambda v: bytes(bytearray([
          (1 << 7 | 1 << 6 | 1 << 5 | (v >> 6)),
author	Sergey Matveev <stargrave@stargrave.org>
	Sun, 1 Dec 2024 09:17:50 +0000 (12:17 +0300)
committer	Sergey Matveev <stargrave@stargrave.org>
	Sun, 1 Dec 2024 09:17:50 +0000 (12:17 +0300)