Blob = namedtuple("Blob", ("l", "v"))
-def LenFirstSort(v):
+def LenFirstUTF8Sort(v):
+ v = v.encode("utf-8")
return (len(v), v)
keys = v.keys()
if not all(isinstance(k, str) for k in keys):
raise ValueError("map keys can be only strings")
- keys = sorted(keys, key=LenFirstSort)
+ keys = sorted(keys, key=LenFirstUTF8Sort)
if (len(keys) > 0) and len(keys[0]) == 0:
raise ValueError("map keys can not be empty")
for k in keys:
if b == TagMap:
ret = {}
v = v[1:]
- kPrev = ""
+ kPrev = b""
allNILs = True
while True:
k, v = _loads(v, _allowContainers=False)
break
if not isinstance(k, str):
raise DecodeError("non-string key")
- if (len(k) < len(kPrev)) or ((len(k) == len(kPrev)) and (k <= kPrev)):
+ kUTF8 = k.encode("utf-8")
+ if (
+ (len(kUTF8) < len(kPrev)) or
+ ((len(kUTF8) == len(kPrev)) and (kUTF8 <= kPrev))
+ ):
if len(k) == 0:
raise DecodeError("empty key")
raise DecodeError("unsorted keys")
if i == _EOC:
raise DecodeError("unexpected EOC")
ret[k] = i
- kPrev = k
+ kPrev = kUTF8
if i is not None:
allNILs = False
if sets and allNILs:
b"".join(
[
b"".join([dumps(key), dumps(test_map[key])])
- for key in sorted(test_map.keys(), key=lambda x: [len(x), x])
+ for key in sorted(test_map.keys(), key=lambda x: (
+ len(x.encode("utf-8")),
+ x.encode("utf-8"),
+ ))
]
) +
b"\x00"
encoded = dumps(test)
decoded, tail = loads(encoded, sets=True)
self.assertEqual(decoded, test)
+
+ def test_equal_len_codepoints(self):
+ s1 = bytes.fromhex("f1ad9bb3c2997c6dc391c2a0c2845a").decode("utf-8")
+ s2 = bytes.fromhex("f1aaab9ec3adc2bcc3b4c3bec38a0cc3ac").decode("utf-8")
+ encoded = dumps(set((s1, s2)))
+ self.assertSequenceEqual(
+ encoded.hex(),
+ (b"".join((b"\x09", dumps(s1), b"\x01", dumps(s2), b"\x01", b"\x00"))).hex(),
+ )