diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2015-03-15 23:43:34 +0200 |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2015-03-15 23:43:34 +0200 |
commit | 7232abb77efba67ee171891dd9568881c97c7027 (patch) | |
tree | e720ca4be3165c218016a60a94adc65184e86d02 /Lib/test/test_codeccallbacks.py | |
parent | b5d2b7f14b9e7def8f6e70e9b955f5554a714c8e (diff) | |
parent | 38d6e0f84a4c1f23b6fba04b71b5f5dc3a6f305a (diff) | |
download | cpython-7232abb77efba67ee171891dd9568881c97c7027.tar.gz |
Increased coverage of standard codec error handlers.
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r-- | Lib/test/test_codeccallbacks.py | 116 |
1 files changed, 99 insertions, 17 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 54277de003..b52e1f6d1c 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -150,6 +150,22 @@ class CodecCallbackTest(unittest.TestCase): sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff" self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) + def test_nameescape(self): + # Does the same as backslashescape, but prefers ``\N{...}`` escape + # sequences. + sin = "a\xac\u1234\u20ac\u8000\U0010ffff" + sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("ascii", "namereplace"), sout) + + sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("latin-1", "namereplace"), sout) + + sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout) + def test_decoding_callbacks(self): # This is a test for a decoding callback handler # that allows the decoding of the invalid sequence @@ -220,6 +236,11 @@ class CodecCallbackTest(unittest.TestCase): "\u0000\ufffd" ) + self.assertEqual( + b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"), + "\u0000\\x00" + ) + codecs.register_error("test.hui", handler_unicodeinternal) self.assertEqual( @@ -287,7 +308,7 @@ class CodecCallbackTest(unittest.TestCase): def test_longstrings(self): # test long strings to check for memory overflow problems errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", - "backslashreplace"] + "backslashreplace", "namereplace"] # register the handlers under different names, # to prevent the codec from recognizing the name for err in errors: @@ -549,17 +570,6 @@ class CodecCallbackTest(unittest.TestCase): codecs.backslashreplace_errors, UnicodeError("ouch") ) - # "backslashreplace" can only be used for encoding - self.assertRaises( - TypeError, - codecs.backslashreplace_errors, - UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") - ) - self.assertRaises( - TypeError, - codecs.backslashreplace_errors, - UnicodeTranslateError("\u3042", 0, 1, "ouch") - ) # Use the correct exception tests = [ ("\u3042", "\\u3042"), @@ -583,6 +593,69 @@ class CodecCallbackTest(unittest.TestCase): UnicodeEncodeError("ascii", s, 0, len(s), "ouch")), (r, len(s)) ) + self.assertEqual( + codecs.backslashreplace_errors( + UnicodeTranslateError(s, 0, len(s), "ouch")), + (r, len(s)) + ) + tests = [ + (b"a", "\\x61"), + (b"\n", "\\x0a"), + (b"\x00", "\\x00"), + (b"\xff", "\\xff"), + ] + for b, r in tests: + with self.subTest(bytes=b): + self.assertEqual( + codecs.backslashreplace_errors( + UnicodeDecodeError("ascii", bytearray(b), 0, 1, "ouch")), + (r, 1) + ) + + def test_badandgoodnamereplaceexceptions(self): + # "namereplace" complains about a non-exception passed in + self.assertRaises( + TypeError, + codecs.namereplace_errors, + 42 + ) + # "namereplace" complains about the wrong exception types + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeError("ouch") + ) + # "namereplace" can only be used for encoding + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") + ) + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeTranslateError("\u3042", 0, 1, "ouch") + ) + # Use the correct exception + tests = [ + ("\u3042", "\\N{HIRAGANA LETTER A}"), + ("\x00", "\\x00"), + ("\ufbf9", "\\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH " + "HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}"), + ("\U000e007f", "\\N{CANCEL TAG}"), + ("\U0010ffff", "\\U0010ffff"), + # Lone surrogates + ("\ud800", "\\ud800"), + ("\udfff", "\\udfff"), + ("\ud800\udfff", "\\ud800\\udfff"), + ] + for s, r in tests: + with self.subTest(str=s): + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", s, 0, len(s), "ouch")), + (r, len(s)) + ) def test_badandgoodsurrogateescapeexceptions(self): surrogateescape_errors = codecs.lookup_error('surrogateescape') @@ -661,20 +734,24 @@ class CodecCallbackTest(unittest.TestCase): surrogatepass_errors, UnicodeDecodeError(enc, "a".encode(enc), 0, 1, "ouch") ) + for s in ("\ud800", "\udfff", "\ud800\udfff"): + with self.subTest(str=s): + self.assertRaises( + UnicodeEncodeError, + surrogatepass_errors, + UnicodeEncodeError("ascii", s, 0, len(s), "ouch") + ) tests = [ - ("ascii", "\ud800", b'\xed\xa0\x80', 3), ("utf-8", "\ud800", b'\xed\xa0\x80', 3), ("utf-16le", "\ud800", b'\x00\xd8', 2), ("utf-16be", "\ud800", b'\xd8\x00', 2), ("utf-32le", "\ud800", b'\x00\xd8\x00\x00', 4), ("utf-32be", "\ud800", b'\x00\x00\xd8\x00', 4), - ("ascii", "\udfff", b'\xed\xbf\xbf', 3), ("utf-8", "\udfff", b'\xed\xbf\xbf', 3), ("utf-16le", "\udfff", b'\xff\xdf', 2), ("utf-16be", "\udfff", b'\xdf\xff', 2), ("utf-32le", "\udfff", b'\xff\xdf\x00\x00', 4), ("utf-32be", "\udfff", b'\x00\x00\xdf\xff', 4), - ("ascii", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3), ("utf-8", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3), ("utf-16le", "\ud800\udfff", b'\x00\xd8\xff\xdf', 2), ("utf-16be", "\ud800\udfff", b'\xd8\x00\xdf\xff', 2), @@ -734,6 +811,10 @@ class CodecCallbackTest(unittest.TestCase): codecs.backslashreplace_errors, codecs.lookup_error("backslashreplace") ) + self.assertEqual( + codecs.namereplace_errors, + codecs.lookup_error("namereplace") + ) def test_unencodablereplacement(self): def unencrepl(exc): @@ -886,7 +967,8 @@ class CodecCallbackTest(unittest.TestCase): class D(dict): def __getitem__(self, key): raise ValueError - for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): + for err in ("strict", "replace", "xmlcharrefreplace", + "backslashreplace", "namereplace", "test.posreturn"): self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None}) self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D()) self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300}) @@ -901,7 +983,7 @@ class CodecCallbackTest(unittest.TestCase): def __getitem__(self, key): raise ValueError #self.assertRaises(ValueError, "\xff".translate, D()) - self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1}) + self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1}) self.assertRaises(TypeError, "\xff".translate, {0xff: ()}) def test_bug828737(self): |