summaryrefslogtreecommitdiff
path: root/Lib/test/test_codecencodings_jp.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_codecencodings_jp.py')
-rw-r--r--Lib/test/test_codecencodings_jp.py119
1 files changed, 72 insertions, 47 deletions
diff --git a/Lib/test/test_codecencodings_jp.py b/Lib/test/test_codecencodings_jp.py
index f56a373896..4091948b9b 100644
--- a/Lib/test/test_codecencodings_jp.py
+++ b/Lib/test/test_codecencodings_jp.py
@@ -1,64 +1,70 @@
-#!/usr/bin/env python3
#
# test_codecencodings_jp.py
# Codec encoding tests for Japanese encodings.
#
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
-class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'cp932'
- tstring = test_multibytecodec_support.load_teststring('shift_jis')
+ tstring = multibytecodec_support.load_teststring('shift_jis')
codectests = (
# invalid bytes
(b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
(b"abc\xf8", "strict", None),
- (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
- (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
- (b"abc\x81\x00\x82\x84", "ignore", "abc\uff44"),
+ (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
+ (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
+ (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"),
+ (b"ab\xEBxy", "replace", "ab\uFFFDxy"),
+ (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
+ (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
# sjis vs cp932
(b"\\\x7e", "replace", "\\\x7e"),
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
)
-class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
+euc_commontests = (
+ # invalid bytes
+ (b"abc\x80\x80\xc1\xc4", "strict", None),
+ (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"),
+ (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"),
+ (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
+ (b"abc\xc8", "strict", None),
+ (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"),
+ (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"),
+ (b"\xc1\x64", "strict", None),
+ (b"\xa1\xc0", "strict", "\uff3c"),
+ (b"\xa1\xc0\\", "strict", "\uff3c\\"),
+ (b"\x8eXY", "replace", "\ufffdXY"),
+)
+
+class Test_EUC_JIS_2004(multibytecodec_support.TestBase,
unittest.TestCase):
- encoding = 'euc_jisx0213'
- tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
- codectests = (
- # invalid bytes
- (b"abc\x80\x80\xc1\xc4", "strict", None),
- (b"abc\xc8", "strict", None),
- (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
- (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
- (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
- (b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
- (b"\xc1\x64", "strict", None),
- (b"\xa1\xc0", "strict", "\uff3c"),
- )
+ encoding = 'euc_jis_2004'
+ tstring = multibytecodec_support.load_teststring('euc_jisx0213')
+ codectests = euc_commontests
xmlcharnametest = (
"\xab\u211c\xbb = \u2329\u1234\u232a",
b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
)
-eucjp_commontests = (
- (b"abc\x80\x80\xc1\xc4", "strict", None),
- (b"abc\xc8", "strict", None),
- (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
- (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
- (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
- (b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
- (b"\xc1\x64", "strict", None),
-)
+class Test_EUC_JISX0213(multibytecodec_support.TestBase,
+ unittest.TestCase):
+ encoding = 'euc_jisx0213'
+ tstring = multibytecodec_support.load_teststring('euc_jisx0213')
+ codectests = euc_commontests
+ xmlcharnametest = (
+ "\xab\u211c\xbb = \u2329\u1234\u232a",
+ b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
+ )
-class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
+class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase,
unittest.TestCase):
encoding = 'euc_jp'
- tstring = test_multibytecodec_support.load_teststring('euc_jp')
- codectests = eucjp_commontests + (
- (b"\xa1\xc0\\", "strict", "\uff3c\\"),
+ tstring = multibytecodec_support.load_teststring('euc_jp')
+ codectests = euc_commontests + (
("\xa5", "strict", b"\x5c"),
("\u203e", "strict", b"\x7e"),
)
@@ -66,29 +72,48 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
shiftjis_commonenctests = (
(b"abc\x80\x80\x82\x84", "strict", None),
(b"abc\xf8", "strict", None),
- (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
- (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
(b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
)
-class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jis'
- tstring = test_multibytecodec_support.load_teststring('shift_jis')
+ tstring = multibytecodec_support.load_teststring('shift_jis')
codectests = shiftjis_commonenctests + (
+ (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
+ (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
+
(b"\\\x7e", "strict", "\\\x7e"),
(b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
+ (b"abc\x81\x39", "replace", "abc\ufffd9"),
+ (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"),
+ (b"abc\xFF\x58", "replace", "abc\ufffdX"),
)
-class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
+ encoding = 'shift_jis_2004'
+ tstring = multibytecodec_support.load_teststring('shift_jis')
+ codectests = shiftjis_commonenctests + (
+ (b"\\\x7e", "strict", "\xa5\u203e"),
+ (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
+ (b"abc\xEA\xFC", "strict", "abc\u64bf"),
+ (b"\x81\x39xy", "replace", "\ufffd9xy"),
+ (b"\xFF\x58xy", "replace", "\ufffdXxy"),
+ (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
+ (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
+ (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
+ )
+ xmlcharnametest = (
+ "\xab\u211c\xbb = \u2329\u1234\u232a",
+ b"\x85Gℜ\x85Q = ⟨ሴ⟩"
+ )
+
+class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jisx0213'
- tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
- codectests = (
- # invalid bytes
- (b"abc\x80\x80\x82\x84", "strict", None),
- (b"abc\xf8", "strict", None),
- (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
- (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
- (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
+ tstring = multibytecodec_support.load_teststring('shift_jisx0213')
+ codectests = shiftjis_commonenctests + (
+ (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
+ (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
+
# sjis vs cp932
(b"\\\x7e", "replace", "\xa5\u203e"),
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),