diff options
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r-- | Lib/test/test_codeccallbacks.py | 102 |
1 files changed, 99 insertions, 3 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 84804bb0da..9743791c9b 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -158,6 +158,22 @@ class CodecCallbackTest(unittest.TestCase): sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff" self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) + def test_nameescape(self): + # Does the same as backslashescape, but prefers ``\N{...}`` escape + # sequences. + sin = "a\xac\u1234\u20ac\u8000\U0010ffff" + sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("ascii", "namereplace"), sout) + + sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("latin-1", "namereplace"), sout) + + sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout) + def test_decoding_callbacks(self): # This is a test for a decoding callback handler # that allows the decoding of the invalid sequence @@ -297,7 +313,7 @@ class CodecCallbackTest(unittest.TestCase): def test_longstrings(self): # test long strings to check for memory overflow problems errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", - "backslashreplace"] + "backslashreplace", "namereplace"] # register the handlers under different names, # to prevent the codec from recognizing the name for err in errors: @@ -611,6 +627,81 @@ class CodecCallbackTest(unittest.TestCase): ("\\udfff", 1) ) + def test_badandgoodnamereplaceexceptions(self): + # "namereplace" complains about a non-exception passed in + self.assertRaises( + TypeError, + codecs.namereplace_errors, + 42 + ) + # "namereplace" complains about the wrong exception types + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeError("ouch") + ) + # "namereplace" can only be used for encoding + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") + ) + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeTranslateError("\u3042", 0, 1, "ouch") + ) + # Use the correct exception + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), + ("\\N{HIRAGANA LETTER A}", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")), + ("\\x00", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")), + ("\\N{LATIN SMALL LETTER Y WITH DIAERESIS}", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")), + ("\\N{LATIN CAPITAL LETTER A WITH MACRON}", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")), + ("\\uffff", 1) + ) + if SIZEOF_WCHAR_T > 0: + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\U00010000", + 0, 1, "ouch")), + ("\\N{LINEAR B SYLLABLE B008 A}", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\U0010ffff", + 0, 1, "ouch")), + ("\\U0010ffff", 1) + ) + # Lone surrogates (regardless of unicode width) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")), + ("\\ud800", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")), + ("\\udfff", 1) + ) + def test_badhandlerresults(self): results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") @@ -651,6 +742,10 @@ class CodecCallbackTest(unittest.TestCase): codecs.backslashreplace_errors, codecs.lookup_error("backslashreplace") ) + self.assertEqual( + codecs.namereplace_errors, + codecs.lookup_error("namereplace") + ) def test_unencodablereplacement(self): def unencrepl(exc): @@ -804,7 +899,8 @@ class CodecCallbackTest(unittest.TestCase): class D(dict): def __getitem__(self, key): raise ValueError - for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): + for err in ("strict", "replace", "xmlcharrefreplace", + "backslashreplace", "namereplace", "test.posreturn"): self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None}) self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D()) self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300}) @@ -819,7 +915,7 @@ class CodecCallbackTest(unittest.TestCase): def __getitem__(self, key): raise ValueError #self.assertRaises(ValueError, "\xff".translate, D()) - self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1}) + self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1}) self.assertRaises(TypeError, "\xff".translate, {0xff: ()}) def test_bug828737(self): |