summaryrefslogtreecommitdiff
path: root/iconvdata/iso-2022-jp.c
diff options
context:
space:
mode:
Diffstat (limited to 'iconvdata/iso-2022-jp.c')
-rw-r--r--iconvdata/iso-2022-jp.c194
1 files changed, 121 insertions, 73 deletions
diff --git a/iconvdata/iso-2022-jp.c b/iconvdata/iso-2022-jp.c
index 9aeaad6c3e..e888d310b7 100644
--- a/iconvdata/iso-2022-jp.c
+++ b/iconvdata/iso-2022-jp.c
@@ -91,7 +91,8 @@ enum
ASCII_set = 0,
JISX0208_1978_set,
JISX0208_1983_set,
- JISX0201_set,
+ JISX0201_Roman_set,
+ JISX0201_Kana_set,
GB2312_set,
KSC5601_set,
JISX0212_set,
@@ -224,13 +225,6 @@ gconv_end (struct gconv_step *data)
{ \
uint32_t ch = *inptr; \
\
- /* This is a 7bit character set, disallow all 8bit characters. */ \
- if (ch > 0x7f) \
- { \
- result = GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
/* Recognize escape sequences. */ \
if (ch == ESC) \
{ \
@@ -260,7 +254,14 @@ gconv_end (struct gconv_step *data)
else if (inptr[2] == 'J') \
{ \
/* JIS X 0201 selected. */ \
- set = JISX0201_set; \
+ set = JISX0201_Roman_set; \
+ inptr += 3; \
+ continue; \
+ } \
+ else if (var == iso2022jp2 && inptr[2] == 'I') \
+ { \
+ /* JIS X 0201 selected. */ \
+ set = JISX0201_Kana_set; \
inptr += 3; \
continue; \
} \
@@ -333,7 +334,18 @@ gconv_end (struct gconv_step *data)
|| (var >= ISO88591_set && ch < 0x20)) \
/* Almost done, just advance the input pointer. */ \
++inptr; \
- else if (set == JISX0201_set) \
+ else if (set == JISX0201_Roman_set) \
+ { \
+ /* Use the JIS X 0201 table. */ \
+ ch = jisx0201_to_ucs4 (ch); \
+ if (ch == UNKNOWN_10646_CHAR) \
+ { \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ ++inptr; \
+ } \
+ else if (set == JISX0201_Kana_set) \
{ \
/* Use the JIS X 0201 table. */ \
ch = jisx0201_to_ucs4 (ch + 0x80); \
@@ -348,13 +360,13 @@ gconv_end (struct gconv_step *data)
{ \
/* This is quite easy. All characters are defined and the \
ISO 10646 value is computed by adding 0x80. */ \
- ch += 0x80; \
+ ch |= 0x80; \
++inptr; \
} \
else if (set == ISO88597_set) \
{ \
/* We use the table from the ISO 8859-7 module. */ \
- ch = iso88597_to_ucs4[ch - 0x20]; \
+ ch = iso88597_to_ucs4[(ch & 0x7f) - 0x20]; \
if (ch == 0) \
{ \
result = GCONV_ILLEGAL_INPUT; \
@@ -415,16 +427,14 @@ gconv_end (struct gconv_step *data)
#define LOOPFCT TO_LOOP
#define BODY \
{ \
- unsigned char ch; \
+ uint32_t ch; \
size_t written = 0; \
\
ch = *((uint32_t *) inptr); \
\
/* First see whether we can write the character using the currently \
selected character set. */ \
- if (set == ASCII_set \
- || (ch >= 0x01 && ((set < ISO88591_set && (ch < 0x21 || ch == 0x7f)) \
- || (set >= ISO88591_set && ch < 0x20)))) \
+ if (set == ASCII_set) \
{ \
/* Please note that the NUL byte is *not* matched if we are not \
currently using the ASCII charset. This is because we must \
@@ -435,13 +445,35 @@ gconv_end (struct gconv_step *data)
written = 1; \
} \
} \
- else if (set == JISX0201_set) \
- written = ucs4_to_jisx0201 (ch, outptr); \
+ else if (set == JISX0201_Roman_set) \
+ { \
+ unsigned char buf[2]; \
+ written = ucs4_to_jisx0201 (ch, buf); \
+ if (written != UNKNOWN_10646_CHAR && buf[0] > 0x20 && buf[0] < 0x80) \
+ { \
+ *outptr++ = buf[0]; \
+ written = 1; \
+ } \
+ else \
+ written = UNKNOWN_10646_CHAR; \
+ } \
+ else if (set == JISX0201_Kana_set) \
+ { \
+ unsigned char buf[2]; \
+ written = ucs4_to_jisx0201 (ch, buf); \
+ if (written != UNKNOWN_10646_CHAR && buf[0] > 0xa0 && buf[0] < 0xe0) \
+ { \
+ *outptr++ = buf[0] - 0x80; \
+ written = 1; \
+ } \
+ else \
+ written = UNKNOWN_10646_CHAR; \
+ } \
else if (set == ISO88591_set) \
{ \
- if (ch >= 0xa0 && ch <= 0xff) \
+ if (ch >= 0x80 && ch <= 0xff) \
{ \
- *outptr++ = ch - 0x80; \
+ *outptr++ = ch; \
written = 1; \
} \
} \
@@ -456,7 +488,7 @@ gconv_end (struct gconv_step *data)
unsigned char res = iso88597_from_ucs4[ch + rp->idx]; \
if (res != '\0') \
{ \
- *outptr++ = res; \
+ *outptr++ = res | 0x80; \
written = 1; \
} \
} \
@@ -488,9 +520,11 @@ gconv_end (struct gconv_step *data)
result = GCONV_FULL_OUTPUT; \
break; \
} \
+ else if (written != UNKNOWN_10646_CHAR) \
+ outptr += written; \
} \
\
- if (written == UNKNOWN_10646_CHAR) \
+ if (written == UNKNOWN_10646_CHAR || written == 0) \
{ \
/* Either this is an unknown character or we have to switch \
the currently selected character set. The character sets \
@@ -529,29 +563,6 @@ gconv_end (struct gconv_step *data)
\
*outptr++ = ch; \
} \
- else if (ch >= 0xa0 && ch <= 0xff) \
- { \
- /* This character set is not available in ISO-2022-JP. */ \
- if (var == iso2022jp) \
- { \
- result = GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- /* We must use the ISO 8859-1 upper half. */ \
- *outptr++ = ESC; \
- *outptr++ = '.'; \
- *outptr++ = 'A'; \
- set = ISO88591_set; \
- \
- if (NEED_LENGTH_TEST && outptr == outend) \
- { \
- result = GCONV_FULL_OUTPUT; \
- break; \
- } \
- \
- *outptr++ = ch - 0x80; \
- } \
else \
{ \
/* Now it becomes difficult. We must search the other \
@@ -562,13 +573,13 @@ gconv_end (struct gconv_step *data)
unsigned char buf[2]; \
\
written = ucs4_to_jisx0201 (ch, buf); \
- if (written != UNKNOWN_10646_CHAR) \
+ if (written != UNKNOWN_10646_CHAR && buf[0] < 0x80) \
{ \
/* We use JIS X 0201. */ \
*outptr++ = ESC; \
- *outptr++ = '$'; \
- *outptr++ = '@'; \
- set = JISX0201_set; \
+ *outptr++ = '('; \
+ *outptr++ = 'J'; \
+ set = JISX0201_Roman_set; \
\
if (NEED_LENGTH_TEST && outptr == outend) \
{ \
@@ -606,11 +617,11 @@ gconv_end (struct gconv_step *data)
} \
else \
{ \
- written = ucs4_to_jisx0208 (ch, buf, 2); \
+ written = ucs4_to_jisx0212 (ch, buf, 2); \
if (written != UNKNOWN_10646_CHAR) \
{ \
/* We use JIS X 0212. */ \
- if (outptr + 4 > outend) \
+ if (NEED_LENGTH_TEST && outptr + 4 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@@ -632,43 +643,51 @@ gconv_end (struct gconv_step *data)
} \
else \
{ \
- written = ucs4_to_gb2312 (ch, buf, 2); \
- if (written != UNKNOWN_10646_CHAR) \
+ written = ucs4_to_jisx0201 (ch, buf); \
+ if (written != UNKNOWN_10646_CHAR && buf[0] >= 0x80) \
{ \
- /* We use GB 2312. */ \
+ /* We use JIS X 0201. */ \
*outptr++ = ESC; \
- *outptr++ = '$'; \
+ *outptr++ = '('; \
+ *outptr++ = 'I'; \
+ set = JISX0201_Kana_set; \
+ \
+ if (NEED_LENGTH_TEST && outptr == outend) \
+ { \
+ result = GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ \
+ *outptr++ = buf[0] - 0x80; \
+ } \
+ else if (ch != 0xa5 && ch >= 0x80 && ch <= 0xff) \
+ { \
+ /* ISO 8859-1 upper half. */ \
+ *outptr++ = ESC; \
+ *outptr++ = '.'; \
*outptr++ = 'A'; \
- set = GB2312_set; \
+ set = ISO88591_set; \
\
- if (NEED_LENGTH_TEST && outptr + 2 > outend) \
+ if (NEED_LENGTH_TEST && outptr == outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
- *outptr++ = buf[0]; \
- *outptr++ = buf[1]; \
+ *outptr++ = ch; \
} \
else \
{ \
- written = ucs4_to_ksc5601 (ch, buf, 2); \
+ written = ucs4_to_gb2312 (ch, buf, 2); \
if (written != UNKNOWN_10646_CHAR) \
{ \
- /* We use KSC 5601. */ \
- if (outptr + 4 > outend) \
- { \
- result = GCONV_FULL_OUTPUT; \
- break; \
- } \
+ /* We use GB 2312. */ \
*outptr++ = ESC; \
*outptr++ = '$'; \
- *outptr++ = '('; \
- *outptr++ = 'C'; \
- set = KSC5601_set; \
+ *outptr++ = 'A'; \
+ set = GB2312_set; \
\
- if (NEED_LENGTH_TEST \
- && outptr + 2 > outend) \
+ if (NEED_LENGTH_TEST && outptr + 2 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@@ -679,8 +698,37 @@ gconv_end (struct gconv_step *data)
} \
else \
{ \
- result = GCONV_ILLEGAL_INPUT; \
- break; \
+ written = ucs4_to_ksc5601 (ch, buf, 2); \
+ if (written != UNKNOWN_10646_CHAR) \
+ { \
+ /* We use KSC 5601. */ \
+ if (NEED_LENGTH_TEST \
+ && outptr + 4 > outend) \
+ { \
+ result = GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ *outptr++ = ESC; \
+ *outptr++ = '$'; \
+ *outptr++ = '('; \
+ *outptr++ = 'C'; \
+ set = KSC5601_set; \
+ \
+ if (NEED_LENGTH_TEST \
+ && outptr + 2 > outend) \
+ { \
+ result = GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ \
+ *outptr++ = buf[0]; \
+ *outptr++ = buf[1]; \
+ } \
+ else \
+ { \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
} \
} \
} \