summaryrefslogtreecommitdiff
path: root/Lib/test/test_codeccallbacks.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r--Lib/test/test_codeccallbacks.py128
1 files changed, 114 insertions, 14 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index 84804bb0da..e29ac53039 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -158,6 +158,22 @@ class CodecCallbackTest(unittest.TestCase):
sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff"
self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
+ def test_nameescape(self):
+ # Does the same as backslashescape, but prefers ``\N{...}`` escape
+ # sequences.
+ sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
+ sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+ b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+ self.assertEqual(sin.encode("ascii", "namereplace"), sout)
+
+ sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+ b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+ self.assertEqual(sin.encode("latin-1", "namereplace"), sout)
+
+ sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4'
+ b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+ self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout)
+
def test_decoding_callbacks(self):
# This is a test for a decoding callback handler
# that allows the decoding of the invalid sequence
@@ -230,6 +246,11 @@ class CodecCallbackTest(unittest.TestCase):
"\u0000\ufffd"
)
+ self.assertEqual(
+ b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"),
+ "\u0000\\x00"
+ )
+
codecs.register_error("test.hui", handler_unicodeinternal)
self.assertEqual(
@@ -297,7 +318,7 @@ class CodecCallbackTest(unittest.TestCase):
def test_longstrings(self):
# test long strings to check for memory overflow problems
errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
- "backslashreplace"]
+ "backslashreplace", "namereplace"]
# register the handlers under different names,
# to prevent the codec from recognizing the name
for err in errors:
@@ -549,17 +570,6 @@ class CodecCallbackTest(unittest.TestCase):
codecs.backslashreplace_errors,
UnicodeError("ouch")
)
- # "backslashreplace" can only be used for encoding
- self.assertRaises(
- TypeError,
- codecs.backslashreplace_errors,
- UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
- )
- self.assertRaises(
- TypeError,
- codecs.backslashreplace_errors,
- UnicodeTranslateError("\u3042", 0, 1, "ouch")
- )
# Use the correct exception
self.assertEqual(
codecs.backslashreplace_errors(
@@ -611,6 +621,91 @@ class CodecCallbackTest(unittest.TestCase):
("\\udfff", 1)
)
+ def test_badandgoodnamereplaceexceptions(self):
+ # "namereplace" complains about a non-exception passed in
+ self.assertRaises(
+ TypeError,
+ codecs.namereplace_errors,
+ 42
+ )
+ # "namereplace" complains about the wrong exception types
+ self.assertRaises(
+ TypeError,
+ codecs.namereplace_errors,
+ UnicodeError("ouch")
+ )
+ # "namereplace" can only be used for encoding
+ self.assertRaises(
+ TypeError,
+ codecs.namereplace_errors,
+ UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
+ )
+ self.assertRaises(
+ TypeError,
+ codecs.namereplace_errors,
+ UnicodeTranslateError("\u3042", 0, 1, "ouch")
+ )
+ # Use the correct exception
+ self.assertEqual(
+ codecs.namereplace_errors(
+ UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
+ ("\\N{HIRAGANA LETTER A}", 1)
+ )
+ self.assertEqual(
+ codecs.namereplace_errors(
+ UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")),
+ ("\\x00", 1)
+ )
+ self.assertEqual(
+ codecs.namereplace_errors(
+ UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")),
+ ("\\N{LATIN SMALL LETTER Y WITH DIAERESIS}", 1)
+ )
+ self.assertEqual(
+ codecs.namereplace_errors(
+ UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")),
+ ("\\N{LATIN CAPITAL LETTER A WITH MACRON}", 1)
+ )
+ self.assertEqual(
+ codecs.namereplace_errors(
+ UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
+ ("\\uffff", 1)
+ )
+ if SIZEOF_WCHAR_T > 0:
+ self.assertEqual(
+ codecs.namereplace_errors(
+ UnicodeEncodeError("ascii", "\U00010000",
+ 0, 1, "ouch")),
+ ("\\N{LINEAR B SYLLABLE B008 A}", 1)
+ )
+ self.assertEqual(
+ codecs.namereplace_errors(
+ UnicodeEncodeError("ascii", "\U0010ffff",
+ 0, 1, "ouch")),
+ ("\\U0010ffff", 1)
+ )
+ # Lone surrogates (regardless of unicode width)
+ self.assertEqual(
+ codecs.namereplace_errors(
+ UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")),
+ ("\\ud800", 1)
+ )
+ self.assertEqual(
+ codecs.namereplace_errors(
+ UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")),
+ ("\\udfff", 1)
+ )
+ self.assertEqual(
+ codecs.backslashreplace_errors(
+ UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")),
+ ("\\xff", 1)
+ )
+ self.assertEqual(
+ codecs.backslashreplace_errors(
+ UnicodeTranslateError("\u3042", 0, 1, "ouch")),
+ ("\\u3042", 1)
+ )
+
def test_badhandlerresults(self):
results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
@@ -651,6 +746,10 @@ class CodecCallbackTest(unittest.TestCase):
codecs.backslashreplace_errors,
codecs.lookup_error("backslashreplace")
)
+ self.assertEqual(
+ codecs.namereplace_errors,
+ codecs.lookup_error("namereplace")
+ )
def test_unencodablereplacement(self):
def unencrepl(exc):
@@ -804,7 +903,8 @@ class CodecCallbackTest(unittest.TestCase):
class D(dict):
def __getitem__(self, key):
raise ValueError
- for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
+ for err in ("strict", "replace", "xmlcharrefreplace",
+ "backslashreplace", "namereplace", "test.posreturn"):
self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None})
self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D())
self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300})
@@ -819,7 +919,7 @@ class CodecCallbackTest(unittest.TestCase):
def __getitem__(self, key):
raise ValueError
#self.assertRaises(ValueError, "\xff".translate, D())
- self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1})
+ self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1})
self.assertRaises(TypeError, "\xff".translate, {0xff: ()})
def test_bug828737(self):