summaryrefslogtreecommitdiff
path: root/Lib/test/test_unicode.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_unicode.py')
-rw-r--r--Lib/test/test_unicode.py34
1 files changed, 27 insertions, 7 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 86ebd45e7b..b1f7c8966a 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1800,9 +1800,6 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(seq.decode('utf-8', 'ignore'),
res.replace('\uFFFD', ''))
- def to_bytestring(self, seq):
- return bytes(int(c, 16) for c in seq.split())
-
def assertCorrectUTF8Decoding(self, seq, res, err):
"""
Check that an invalid UTF-8 sequence raises a UnicodeDecodeError when
@@ -1858,7 +1855,7 @@ class UnicodeTest(string_tests.CommonTest,
]
FFFD = '\ufffd'
for seq in sequences:
- self.assertCorrectUTF8Decoding(self.to_bytestring(seq), '\ufffd',
+ self.assertCorrectUTF8Decoding(bytes.fromhex(seq), '\ufffd',
'unexpected end of data')
def test_invalid_cb_for_2bytes_seq(self):
@@ -1880,7 +1877,7 @@ class UnicodeTest(string_tests.CommonTest,
('DF C0', FFFDx2), ('DF FF', FFFDx2),
]
for seq, res in sequences:
- self.assertCorrectUTF8Decoding(self.to_bytestring(seq), res,
+ self.assertCorrectUTF8Decoding(bytes.fromhex(seq), res,
'invalid continuation byte')
def test_invalid_cb_for_3bytes_seq(self):
@@ -1938,7 +1935,7 @@ class UnicodeTest(string_tests.CommonTest,
('EF BF C0', FFFDx2), ('EF BF FF', FFFDx2),
]
for seq, res in sequences:
- self.assertCorrectUTF8Decoding(self.to_bytestring(seq), res,
+ self.assertCorrectUTF8Decoding(bytes.fromhex(seq), res,
'invalid continuation byte')
def test_invalid_cb_for_4bytes_seq(self):
@@ -2017,7 +2014,7 @@ class UnicodeTest(string_tests.CommonTest,
('F4 8F BF C0', FFFDx2), ('F4 8F BF FF', FFFDx2)
]
for seq, res in sequences:
- self.assertCorrectUTF8Decoding(self.to_bytestring(seq), res,
+ self.assertCorrectUTF8Decoding(bytes.fromhex(seq), res,
'invalid continuation byte')
def test_codecs_idna(self):
@@ -2735,6 +2732,29 @@ class CAPITest(unittest.TestCase):
self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
+ # Test PyUnicode_FindChar()
+ @support.cpython_only
+ def test_findchar(self):
+ from _testcapi import unicode_findchar
+
+ for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
+ for i, ch in enumerate(str):
+ self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
+ self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i)
+
+ str = "!>_<!"
+ self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
+ self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
+ # start < end
+ self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
+ self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4)
+ # start >= end
+ self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
+ self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
+ # negative
+ self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
+ self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
+
# Test PyUnicode_CopyCharacters()
@support.cpython_only
def test_copycharacters(self):