diff options
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r-- | Lib/test/test_codeccallbacks.py | 121 |
1 files changed, 103 insertions, 18 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index cacdfae7d2..4cfb88e99b 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -150,6 +150,22 @@ class CodecCallbackTest(unittest.TestCase): sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff" self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) + def test_nameescape(self): + # Does the same as backslashescape, but prefers ``\N{...}`` escape + # sequences. + sin = "a\xac\u1234\u20ac\u8000\U0010ffff" + sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("ascii", "namereplace"), sout) + + sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("latin-1", "namereplace"), sout) + + sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout) + def test_decoding_callbacks(self): # This is a test for a decoding callback handler # that allows the decoding of the invalid sequence @@ -220,6 +236,11 @@ class CodecCallbackTest(unittest.TestCase): "\u0000\ufffd" ) + self.assertEqual( + b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"), + "\u0000\\x00" + ) + codecs.register_error("test.hui", handler_unicodeinternal) self.assertEqual( @@ -287,7 +308,7 @@ class CodecCallbackTest(unittest.TestCase): def test_longstrings(self): # test long strings to check for memory overflow problems errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", - "backslashreplace"] + "backslashreplace", "namereplace"] # register the handlers under different names, # to prevent the codec from recognizing the name for err in errors: @@ -550,17 +571,6 @@ class CodecCallbackTest(unittest.TestCase): codecs.backslashreplace_errors, UnicodeError("ouch") ) - # "backslashreplace" can only be used for encoding - self.assertRaises( - TypeError, - codecs.backslashreplace_errors, - UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") - ) - self.assertRaises( - TypeError, - codecs.backslashreplace_errors, - UnicodeTranslateError("\u3042", 0, 1, "ouch") - ) # Use the correct exception tests = [ ("\u3042", "\\u3042"), @@ -585,6 +595,72 @@ class CodecCallbackTest(unittest.TestCase): 1, 1 + len(s), "ouch")), (r, 1 + len(s)) ) + self.assertEqual( + codecs.backslashreplace_errors( + UnicodeTranslateError("a" + s + "b", + 1, 1 + len(s), "ouch")), + (r, 1 + len(s)) + ) + tests = [ + (b"a", "\\x61"), + (b"\n", "\\x0a"), + (b"\x00", "\\x00"), + (b"\xff", "\\xff"), + ] + for b, r in tests: + with self.subTest(bytes=b): + self.assertEqual( + codecs.backslashreplace_errors( + UnicodeDecodeError("ascii", bytearray(b"a" + b + b"b"), + 1, 2, "ouch")), + (r, 2) + ) + + def test_badandgoodnamereplaceexceptions(self): + # "namereplace" complains about a non-exception passed in + self.assertRaises( + TypeError, + codecs.namereplace_errors, + 42 + ) + # "namereplace" complains about the wrong exception types + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeError("ouch") + ) + # "namereplace" can only be used for encoding + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") + ) + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeTranslateError("\u3042", 0, 1, "ouch") + ) + # Use the correct exception + tests = [ + ("\u3042", "\\N{HIRAGANA LETTER A}"), + ("\x00", "\\x00"), + ("\ufbf9", "\\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH " + "HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}"), + ("\U000e007f", "\\N{CANCEL TAG}"), + ("\U0010ffff", "\\U0010ffff"), + # Lone surrogates + ("\ud800", "\\ud800"), + ("\udfff", "\\udfff"), + ("\ud800\udfff", "\\ud800\\udfff"), + ] + for s, r in tests: + with self.subTest(str=s): + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "a" + s + "b", + 1, 1 + len(s), "ouch")), + (r, 1 + len(s)) + ) def test_badandgoodsurrogateescapeexceptions(self): surrogateescape_errors = codecs.lookup_error('surrogateescape') @@ -663,20 +739,24 @@ class CodecCallbackTest(unittest.TestCase): surrogatepass_errors, UnicodeDecodeError(enc, "a".encode(enc), 0, 1, "ouch") ) + for s in ("\ud800", "\udfff", "\ud800\udfff"): + with self.subTest(str=s): + self.assertRaises( + UnicodeEncodeError, + surrogatepass_errors, + UnicodeEncodeError("ascii", s, 0, len(s), "ouch") + ) tests = [ - ("ascii", "\ud800", b'\xed\xa0\x80', 3), ("utf-8", "\ud800", b'\xed\xa0\x80', 3), ("utf-16le", "\ud800", b'\x00\xd8', 2), ("utf-16be", "\ud800", b'\xd8\x00', 2), ("utf-32le", "\ud800", b'\x00\xd8\x00\x00', 4), ("utf-32be", "\ud800", b'\x00\x00\xd8\x00', 4), - ("ascii", "\udfff", b'\xed\xbf\xbf', 3), ("utf-8", "\udfff", b'\xed\xbf\xbf', 3), ("utf-16le", "\udfff", b'\xff\xdf', 2), ("utf-16be", "\udfff", b'\xdf\xff', 2), ("utf-32le", "\udfff", b'\xff\xdf\x00\x00', 4), ("utf-32be", "\udfff", b'\x00\x00\xdf\xff', 4), - ("ascii", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3), ("utf-8", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3), ("utf-16le", "\ud800\udfff", b'\x00\xd8\xff\xdf', 2), ("utf-16be", "\ud800\udfff", b'\xd8\x00\xdf\xff', 2), @@ -694,7 +774,7 @@ class CodecCallbackTest(unittest.TestCase): self.assertEqual( surrogatepass_errors( UnicodeDecodeError(enc, bytearray(b"a" + b[:n] + b"b"), - 1, n, "ouch")), + 1, 1 + n, "ouch")), (s[:1], 1 + n) ) @@ -738,6 +818,10 @@ class CodecCallbackTest(unittest.TestCase): codecs.backslashreplace_errors, codecs.lookup_error("backslashreplace") ) + self.assertEqual( + codecs.namereplace_errors, + codecs.lookup_error("namereplace") + ) def test_unencodablereplacement(self): def unencrepl(exc): @@ -890,7 +974,8 @@ class CodecCallbackTest(unittest.TestCase): class D(dict): def __getitem__(self, key): raise ValueError - for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): + for err in ("strict", "replace", "xmlcharrefreplace", + "backslashreplace", "namereplace", "test.posreturn"): self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None}) self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D()) self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300}) @@ -905,7 +990,7 @@ class CodecCallbackTest(unittest.TestCase): def __getitem__(self, key): raise ValueError #self.assertRaises(ValueError, "\xff".translate, D()) - self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1}) + self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1}) self.assertRaises(TypeError, "\xff".translate, {0xff: ()}) def test_bug828737(self): |