Increased coverage of standard codec error handlers.

author: Serhiy Storchaka <storchaka@gmail.com> 2015-03-15 23:43:34 +0200
committer: Serhiy Storchaka <storchaka@gmail.com> 2015-03-15 23:43:34 +0200
commit: 7232abb77efba67ee171891dd9568881c97c7027 (patch)
tree: e720ca4be3165c218016a60a94adc65184e86d02 /Lib/test/test_codeccallbacks.py
parent: b5d2b7f14b9e7def8f6e70e9b955f5554a714c8e (diff)
parent: 38d6e0f84a4c1f23b6fba04b71b5f5dc3a6f305a (diff)
download: cpython-7232abb77efba67ee171891dd9568881c97c7027.tar.gz
1 files changed, 99 insertions, 17 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index 54277de003..b52e1f6d1c 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -150,6 +150,22 @@ class CodecCallbackTest(unittest.TestCase):
         sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff"
         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
 
+    def test_nameescape(self):
+        # Does the same as backslashescape, but prefers ``\N{...}`` escape
+        # sequences.
+        sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
+        sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+                b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+        self.assertEqual(sin.encode("ascii", "namereplace"), sout)
+
+        sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+                b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+        self.assertEqual(sin.encode("latin-1", "namereplace"), sout)
+
+        sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4'
+                b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+        self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout)
+
     def test_decoding_callbacks(self):
         # This is a test for a decoding callback handler
         # that allows the decoding of the invalid sequence
@@ -220,6 +236,11 @@ class CodecCallbackTest(unittest.TestCase):
                     "\u0000\ufffd"
                 )
 
+                self.assertEqual(
+                    b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"),
+                    "\u0000\\x00"
+                )
+
                 codecs.register_error("test.hui", handler_unicodeinternal)
 
                 self.assertEqual(
@@ -287,7 +308,7 @@ class CodecCallbackTest(unittest.TestCase):
     def test_longstrings(self):
         # test long strings to check for memory overflow problems
         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
-                   "backslashreplace"]
+                   "backslashreplace", "namereplace"]
         # register the handlers under different names,
         # to prevent the codec from recognizing the name
         for err in errors:
@@ -549,17 +570,6 @@ class CodecCallbackTest(unittest.TestCase):
            codecs.backslashreplace_errors,
            UnicodeError("ouch")
         )
-        # "backslashreplace" can only be used for encoding
-        self.assertRaises(
-            TypeError,
-            codecs.backslashreplace_errors,
-            UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
-        )
-        self.assertRaises(
-            TypeError,
-            codecs.backslashreplace_errors,
-            UnicodeTranslateError("\u3042", 0, 1, "ouch")
-        )
         # Use the correct exception
         tests = [
             ("\u3042", "\\u3042"),
@@ -583,6 +593,69 @@ class CodecCallbackTest(unittest.TestCase):
                         UnicodeEncodeError("ascii", s, 0, len(s), "ouch")),
                     (r, len(s))
                 )
+                self.assertEqual(
+                    codecs.backslashreplace_errors(
+                        UnicodeTranslateError(s, 0, len(s), "ouch")),
+                    (r, len(s))
+                )
+        tests = [
+            (b"a", "\\x61"),
+            (b"\n", "\\x0a"),
+            (b"\x00", "\\x00"),
+            (b"\xff", "\\xff"),
+        ]
+        for b, r in tests:
+            with self.subTest(bytes=b):
+                self.assertEqual(
+                    codecs.backslashreplace_errors(
+                        UnicodeDecodeError("ascii", bytearray(b), 0, 1, "ouch")),
+                    (r, 1)
+                )
+
+    def test_badandgoodnamereplaceexceptions(self):
+        # "namereplace" complains about a non-exception passed in
+        self.assertRaises(
+           TypeError,
+           codecs.namereplace_errors,
+           42
+        )
+        # "namereplace" complains about the wrong exception types
+        self.assertRaises(
+           TypeError,
+           codecs.namereplace_errors,
+           UnicodeError("ouch")
+        )
+        # "namereplace" can only be used for encoding
+        self.assertRaises(
+            TypeError,
+            codecs.namereplace_errors,
+            UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
+        )
+        self.assertRaises(
+            TypeError,
+            codecs.namereplace_errors,
+            UnicodeTranslateError("\u3042", 0, 1, "ouch")
+        )
+        # Use the correct exception
+        tests = [
+            ("\u3042", "\\N{HIRAGANA LETTER A}"),
+            ("\x00", "\\x00"),
+            ("\ufbf9", "\\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH "
+                       "HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}"),
+            ("\U000e007f", "\\N{CANCEL TAG}"),
+            ("\U0010ffff", "\\U0010ffff"),
+            # Lone surrogates
+            ("\ud800", "\\ud800"),
+            ("\udfff", "\\udfff"),
+            ("\ud800\udfff", "\\ud800\\udfff"),
+        ]
+        for s, r in tests:
+            with self.subTest(str=s):
+                self.assertEqual(
+                    codecs.namereplace_errors(
+                        UnicodeEncodeError("ascii", s, 0, len(s), "ouch")),
+                    (r, len(s))
+                )
 
     def test_badandgoodsurrogateescapeexceptions(self):
         surrogateescape_errors = codecs.lookup_error('surrogateescape')
@@ -661,20 +734,24 @@ class CodecCallbackTest(unittest.TestCase):
                     surrogatepass_errors,
                     UnicodeDecodeError(enc, "a".encode(enc), 0, 1, "ouch")
                 )
+        for s in ("\ud800", "\udfff", "\ud800\udfff"):
+            with self.subTest(str=s):
+                self.assertRaises(
+                    UnicodeEncodeError,
+                    surrogatepass_errors,
+                    UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
+                )
         tests = [
-            ("ascii", "\ud800", b'\xed\xa0\x80', 3),
             ("utf-8", "\ud800", b'\xed\xa0\x80', 3),
             ("utf-16le", "\ud800", b'\x00\xd8', 2),
             ("utf-16be", "\ud800", b'\xd8\x00', 2),
             ("utf-32le", "\ud800", b'\x00\xd8\x00\x00', 4),
             ("utf-32be", "\ud800", b'\x00\x00\xd8\x00', 4),
-            ("ascii", "\udfff", b'\xed\xbf\xbf', 3),
             ("utf-8", "\udfff", b'\xed\xbf\xbf', 3),
             ("utf-16le", "\udfff", b'\xff\xdf', 2),
             ("utf-16be", "\udfff", b'\xdf\xff', 2),
             ("utf-32le", "\udfff", b'\xff\xdf\x00\x00', 4),
             ("utf-32be", "\udfff", b'\x00\x00\xdf\xff', 4),
-            ("ascii", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3),
             ("utf-8", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3),
             ("utf-16le", "\ud800\udfff", b'\x00\xd8\xff\xdf', 2),
             ("utf-16be", "\ud800\udfff", b'\xd8\x00\xdf\xff', 2),
@@ -734,6 +811,10 @@ class CodecCallbackTest(unittest.TestCase):
             codecs.backslashreplace_errors,
             codecs.lookup_error("backslashreplace")
         )
+        self.assertEqual(
+            codecs.namereplace_errors,
+            codecs.lookup_error("namereplace")
+        )
 
     def test_unencodablereplacement(self):
         def unencrepl(exc):
@@ -886,7 +967,8 @@ class CodecCallbackTest(unittest.TestCase):
         class D(dict):
             def __getitem__(self, key):
                 raise ValueError
-        for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
+        for err in ("strict", "replace", "xmlcharrefreplace",
+                    "backslashreplace", "namereplace", "test.posreturn"):
             self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None})
             self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D())
             self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300})
@@ -901,7 +983,7 @@ class CodecCallbackTest(unittest.TestCase):
             def __getitem__(self, key):
                 raise ValueError
         #self.assertRaises(ValueError, "\xff".translate, D())
-        self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1})
+        self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1})
         self.assertRaises(TypeError, "\xff".translate, {0xff: ()})
 
     def test_bug828737(self):
author	Serhiy Storchaka <storchaka@gmail.com>	2015-03-15 23:43:34 +0200
committer	Serhiy Storchaka <storchaka@gmail.com>	2015-03-15 23:43:34 +0200
commit	7232abb77efba67ee171891dd9568881c97c7027 (patch)
tree	e720ca4be3165c218016a60a94adc65184e86d02 /Lib/test/test_codeccallbacks.py
parent	b5d2b7f14b9e7def8f6e70e9b955f5554a714c8e (diff)
parent	38d6e0f84a4c1f23b6fba04b71b5f5dc3a6f305a (diff)
download	cpython-7232abb77efba67ee171891dd9568881c97c7027.tar.gz