From bed50e1fd3e82021bd51be24920880518268372e Mon Sep 17 00:00:00 2001 From: Xiang Zhang Date: Tue, 20 Dec 2016 22:52:33 +0800 Subject: Issue #28822: Adjust indices handling of PyUnicode_FindChar(). --- Lib/test/test_unicode.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'Lib/test/test_unicode.py') diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 883c362a03..fb77ffb470 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2728,6 +2728,29 @@ class CAPITest(unittest.TestCase): self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0') self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff') + # Test PyUnicode_FindChar() + @support.cpython_only + def test_findchar(self): + from _testcapi import unicode_findchar + + for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": + for i, ch in enumerate(str): + self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i) + self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i) + + str = "!>_= end + self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1) + self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1) + # negative + self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0) + self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0) + # Test PyUnicode_CopyCharacters() @support.cpython_only def test_copycharacters(self): -- cgit v1.2.1 From 0600f750ef142dc1594bc489d17a43453a74ea80 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 21 Dec 2016 12:59:28 +0200 Subject: Issue #28992: Use bytes.fromhex(). --- Lib/test/test_unicode.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'Lib/test/test_unicode.py') diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index fb77ffb470..003ff18348 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1793,9 +1793,6 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual(seq.decode('utf-8', 'ignore'), res.replace('\uFFFD', '')) - def to_bytestring(self, seq): - return bytes(int(c, 16) for c in seq.split()) - def assertCorrectUTF8Decoding(self, seq, res, err): """ Check that an invalid UTF-8 sequence raises a UnicodeDecodeError when @@ -1851,7 +1848,7 @@ class UnicodeTest(string_tests.CommonTest, ] FFFD = '\ufffd' for seq in sequences: - self.assertCorrectUTF8Decoding(self.to_bytestring(seq), '\ufffd', + self.assertCorrectUTF8Decoding(bytes.fromhex(seq), '\ufffd', 'unexpected end of data') def test_invalid_cb_for_2bytes_seq(self): @@ -1873,7 +1870,7 @@ class UnicodeTest(string_tests.CommonTest, ('DF C0', FFFDx2), ('DF FF', FFFDx2), ] for seq, res in sequences: - self.assertCorrectUTF8Decoding(self.to_bytestring(seq), res, + self.assertCorrectUTF8Decoding(bytes.fromhex(seq), res, 'invalid continuation byte') def test_invalid_cb_for_3bytes_seq(self): @@ -1931,7 +1928,7 @@ class UnicodeTest(string_tests.CommonTest, ('EF BF C0', FFFDx2), ('EF BF FF', FFFDx2), ] for seq, res in sequences: - self.assertCorrectUTF8Decoding(self.to_bytestring(seq), res, + self.assertCorrectUTF8Decoding(bytes.fromhex(seq), res, 'invalid continuation byte') def test_invalid_cb_for_4bytes_seq(self): @@ -2010,7 +2007,7 @@ class UnicodeTest(string_tests.CommonTest, ('F4 8F BF C0', FFFDx2), ('F4 8F BF FF', FFFDx2) ] for seq, res in sequences: - self.assertCorrectUTF8Decoding(self.to_bytestring(seq), res, + self.assertCorrectUTF8Decoding(bytes.fromhex(seq), res, 'invalid continuation byte') def test_codecs_idna(self): -- cgit v1.2.1