diff options
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r-- | Lib/test/test_codecs.py | 86 |
1 files changed, 65 insertions, 21 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 93cdf915b8..d560d7aa46 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -29,7 +29,7 @@ class MixInCheckStateHandling: d = codecs.getincrementaldecoder(encoding)() part1 = d.decode(s[:i]) state = d.getstate() - self.assertTrue(isinstance(state[1], int)) + self.assertIsInstance(state[1], int) # Check that the condition stated in the documentation for # IncrementalDecoder.getstate() holds if not state[1]: @@ -72,7 +72,6 @@ class ReadTest(unittest.TestCase, MixInCheckStateHandling): # check that there's nothing left in the buffers self.assertEqual(r.read(), "") self.assertEqual(r.bytebuffer, b"") - self.assertEqual(r.charbuffer, "") # do the check again, this time using a incremental decoder d = codecs.getincrementaldecoder(self.encoding)() @@ -545,6 +544,12 @@ class UTF16LETest(ReadTest): self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, b"\xff", "strict", True) + def test_nonbmp(self): + self.assertEqual("\U00010203".encode(self.encoding), + b'\x00\xd8\x03\xde') + self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding), + "\U00010203") + class UTF16BETest(ReadTest): encoding = "utf-16-be" @@ -567,6 +572,12 @@ class UTF16BETest(ReadTest): self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, b"\xff", "strict", True) + def test_nonbmp(self): + self.assertEqual("\U00010203".encode(self.encoding), + b'\xd8\x00\xde\x03') + self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding), + "\U00010203") + class UTF8Test(ReadTest): encoding = "utf-8" @@ -653,18 +664,6 @@ class ReadBufferTest(unittest.TestCase): self.assertRaises(TypeError, codecs.readbuffer_encode) self.assertRaises(TypeError, codecs.readbuffer_encode, 42) -class CharBufferTest(unittest.TestCase): - - def test_string(self): - self.assertEqual(codecs.charbuffer_encode(b"spam"), (b"spam", 4)) - - def test_empty(self): - self.assertEqual(codecs.charbuffer_encode(b""), (b"", 0)) - - def test_bad_args(self): - self.assertRaises(TypeError, codecs.charbuffer_encode) - self.assertRaises(TypeError, codecs.charbuffer_encode, 42) - class UTF8SigTest(ReadTest): encoding = "utf-8-sig" @@ -1276,6 +1275,7 @@ all_unicode_encodings = [ "cp424", "cp437", "cp500", + "cp720", "cp737", "cp775", "cp850", @@ -1283,6 +1283,7 @@ all_unicode_encodings = [ "cp855", "cp856", "cp857", + "cp858", "cp860", "cp861", "cp862", @@ -1371,11 +1372,6 @@ broken_incremental_coders = broken_unicode_with_streams + [ "idna", ] -# The following encodings only support "strict" mode -only_strict_mode = [ - "idna", -] - class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): def test_basics(self): s = "abc123" # all codecs should be able to encode these @@ -1450,7 +1446,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): result = "".join(codecs.iterdecode(codecs.iterencode("", encoding), encoding)) self.assertEqual(result, "") - if encoding not in only_strict_mode: + if encoding not in ("idna", "mbcs"): # check incremental decoder/encoder with errors argument try: encoder = codecs.getincrementalencoder(encoding)("ignore") @@ -1675,6 +1671,54 @@ class BomTest(unittest.TestCase): self.assertEqual(f.read(), data * 2) +bytes_transform_encodings = [ + "base64_codec", + "uu_codec", + "quopri_codec", + "hex_codec", +] +try: + import zlib +except ImportError: + pass +else: + bytes_transform_encodings.append("zlib_codec") +try: + import bz2 +except ImportError: + pass +else: + bytes_transform_encodings.append("bz2_codec") + +class TransformCodecTest(unittest.TestCase): + + def test_basics(self): + binput = bytes(range(256)) + for encoding in bytes_transform_encodings: + # generic codecs interface + (o, size) = codecs.getencoder(encoding)(binput) + self.assertEqual(size, len(binput)) + (i, size) = codecs.getdecoder(encoding)(o) + self.assertEqual(size, len(o)) + self.assertEqual(i, binput) + + def test_read(self): + for encoding in bytes_transform_encodings: + sin = codecs.encode(b"\x80", encoding) + reader = codecs.getreader(encoding)(io.BytesIO(sin)) + sout = reader.read() + self.assertEqual(sout, b"\x80") + + def test_readline(self): + for encoding in bytes_transform_encodings: + if encoding in ['uu_codec', 'zlib_codec']: + continue + sin = codecs.encode(b"\x80", encoding) + reader = codecs.getreader(encoding)(io.BytesIO(sin)) + sout = reader.readline() + self.assertEqual(sout, b"\x80") + + def test_main(): support.run_unittest( UTF32Test, @@ -1688,7 +1732,6 @@ def test_main(): UTF7Test, UTF16ExTest, ReadBufferTest, - CharBufferTest, RecodingTest, PunycodeTest, UnicodeInternalTest, @@ -1703,6 +1746,7 @@ def test_main(): TypesTest, SurrogateEscapeTest, BomTest, + TransformCodecTest, ) |