summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_multibytecodec_support.py36
-rw-r--r--Modules/cjkcodecs/_codecs_hk.c93
-rw-r--r--Modules/cjkcodecs/_codecs_jp.c10
3 files changed, 85 insertions, 54 deletions
diff --git a/Lib/test/test_multibytecodec_support.py b/Lib/test/test_multibytecodec_support.py
index 7735976615..ef63b6934d 100644
--- a/Lib/test/test_multibytecodec_support.py
+++ b/Lib/test/test_multibytecodec_support.py
@@ -58,11 +58,16 @@ class TestBase:
result = func(source, scheme)[0]
if func is self.decode:
self.assertTrue(type(result) is str, type(result))
+ self.assertEqual(result, expected,
+ '%a.decode(%r, %r)=%a != %a'
+ % (source, self.encoding, scheme, result,
+ expected))
else:
self.assertTrue(type(result) is bytes, type(result))
- self.assertEqual(result, expected,
- '%a.decode(%r)=%a != %a'
- % (source, self.encoding, result, expected))
+ self.assertEqual(result, expected,
+ '%a.encode(%r, %r)=%a != %a'
+ % (source, self.encoding, scheme, result,
+ expected))
else:
self.assertRaises(UnicodeError, func, source, scheme)
@@ -279,6 +284,7 @@ class TestBase_Mapping(unittest.TestCase):
pass_enctest = []
pass_dectest = []
supmaps = []
+ codectests = []
def __init__(self, *args, **kw):
unittest.TestCase.__init__(self, *args, **kw)
@@ -348,6 +354,30 @@ class TestBase_Mapping(unittest.TestCase):
if (csetch, unich) not in self.pass_dectest:
self.assertEqual(str(csetch, self.encoding), unich)
+ def test_errorhandle(self):
+ for source, scheme, expected in self.codectests:
+ if isinstance(source, bytes):
+ func = source.decode
+ else:
+ func = source.encode
+ if expected:
+ if isinstance(source, bytes):
+ result = func(self.encoding, scheme)
+ self.assertTrue(type(result) is str, type(result))
+ self.assertEqual(result, expected,
+ '%a.decode(%r, %r)=%a != %a'
+ % (source, self.encoding, scheme, result,
+ expected))
+ else:
+ result = func(self.encoding, scheme)
+ self.assertTrue(type(result) is bytes, type(result))
+ self.assertEqual(result, expected,
+ '%a.encode(%r, %r)=%a != %a'
+ % (source, self.encoding, scheme, result,
+ expected))
+ else:
+ self.assertRaises(UnicodeError, func, self.encoding, scheme)
+
def load_teststring(name):
dir = os.path.join(os.path.dirname(__file__), 'cjkencodings')
with open(os.path.join(dir, name + '.txt'), 'rb') as f:
diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c
index aaf103db88..558a42f89c 100644
--- a/Modules/cjkcodecs/_codecs_hk.c
+++ b/Modules/cjkcodecs/_codecs_hk.c
@@ -115,55 +115,56 @@ DECODER(big5hkscs)
REQUIRE_INBUF(2)
- if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
- goto hkscsdec;
+ if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
+ TRYMAP_DEC(big5, **outbuf, c, IN2) {
+ NEXT(2, 1)
+ continue;
+ }
+ }
+
+ TRYMAP_DEC(big5hkscs, decoded, c, IN2)
+ {
+ int s = BH2S(c, IN2);
+ const unsigned char *hintbase;
+
+ assert(0x87 <= c && c <= 0xfe);
+ assert(0x40 <= IN2 && IN2 <= 0xfe);
+
+ if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
+ hintbase = big5hkscs_phint_0;
+ s -= BH2S(0x87, 0x40);
+ }
+ else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
+ hintbase = big5hkscs_phint_12130;
+ s -= BH2S(0xc6, 0xa1);
+ }
+ else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
+ hintbase = big5hkscs_phint_21924;
+ s -= BH2S(0xf9, 0xd6);
+ }
+ else
+ return MBERR_INTERNAL;
- TRYMAP_DEC(big5, **outbuf, c, IN2) {
- NEXT(2, 1)
+ if (hintbase[s >> 3] & (1 << (s & 7))) {
+ WRITEUCS4(decoded | 0x20000)
+ NEXT_IN(2)
+ }
+ else {
+ OUT1(decoded)
+ NEXT(2, 1)
+ }
+ continue;
}
- else
-hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
- int s = BH2S(c, IN2);
- const unsigned char *hintbase;
-
- assert(0x87 <= c && c <= 0xfe);
- assert(0x40 <= IN2 && IN2 <= 0xfe);
-
- if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
- hintbase = big5hkscs_phint_0;
- s -= BH2S(0x87, 0x40);
- }
- else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
- hintbase = big5hkscs_phint_12130;
- s -= BH2S(0xc6, 0xa1);
- }
- else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
- hintbase = big5hkscs_phint_21924;
- s -= BH2S(0xf9, 0xd6);
- }
- else
- return MBERR_INTERNAL;
-
- if (hintbase[s >> 3] & (1 << (s & 7))) {
- WRITEUCS4(decoded | 0x20000)
- NEXT_IN(2)
- }
- else {
- OUT1(decoded)
- NEXT(2, 1)
- }
- }
- else {
- switch ((c << 8) | IN2) {
- case 0x8862: WRITE2(0x00ca, 0x0304); break;
- case 0x8864: WRITE2(0x00ca, 0x030c); break;
- case 0x88a3: WRITE2(0x00ea, 0x0304); break;
- case 0x88a5: WRITE2(0x00ea, 0x030c); break;
- default: return 2;
- }
-
- NEXT(2, 2) /* all decoded codepoints are pairs, above. */
+
+ switch ((c << 8) | IN2) {
+ case 0x8862: WRITE2(0x00ca, 0x0304); break;
+ case 0x8864: WRITE2(0x00ca, 0x030c); break;
+ case 0x88a3: WRITE2(0x00ea, 0x0304); break;
+ case 0x88a5: WRITE2(0x00ea, 0x030c); break;
+ default: return 2;
}
+
+ NEXT(2, 2) /* all decoded codepoints are pairs, above. */
}
return 0;
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
index 901d3bee47..a05e01b32e 100644
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -371,11 +371,11 @@ DECODER(euc_jp)
REQUIRE_OUTBUF(1)
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */