From 90e485702bf32ca9b69b8e7f8461a7a3db74274d0f0121931a4fc051b536234c Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Sun, 1 Dec 2024 12:17:50 +0300 Subject: [PATCH] Missing invalid UTF-8 strategies --- pyac/tests/test_str.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pyac/tests/test_str.py b/pyac/tests/test_str.py index c8f21c7..3b1ed12 100644 --- a/pyac/tests/test_str.py +++ b/pyac/tests/test_str.py @@ -31,10 +31,20 @@ from tests.strategies import unicode_allowed TagStr: int = 0x80 TagUTF8: int = 0x40 +# Here go invalid UTF-8 byte(s) strategies. We can fail in following +# ways during decoding: +# * invalid start byte -- hypothesis generates integers that will +# satisfy that fail +# * invalid continuation byte -- we strip last byte of multibyte +# sequences +# * unexpected end of data -- we strip the last byte of our data + +# 110xxxxx 10xxxxxx(STRIPPED) invalid_utf8_2byte_s = integers(min_value=0, max_value=(1 << 5) - 1).map( lambda v: bytes(bytearray([1 << 7 | 1 << 6 | v])) ) +# 1110xxxx 10xxxxxx 10xxxxxx(STRIPPED) invalid_utf8_3byte_s = integers(min_value=0, max_value=(1 << 10) - 1).map( lambda v: bytes(bytearray([ (1 << 7 | 1 << 6 | 1 << 5 | (v >> 6)), -- 2.50.0