diff options
Diffstat (limited to 'ext/mbstring/libmbfl/filters/mbfilter_jis.c')
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_jis.c | 237 |
1 files changed, 22 insertions, 215 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c index b08eec38b0..b62956e617 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c @@ -33,8 +33,7 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" -static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter); +static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter); const mbfl_encoding mbfl_encoding_jis = { mbfl_no_encoding_jis, @@ -42,7 +41,7 @@ const mbfl_encoding mbfl_encoding_jis = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE, + MBFL_ENCTYPE_GL_UNSAFE, &vtbl_jis_wchar, &vtbl_wchar_jis }; @@ -53,30 +52,18 @@ const mbfl_encoding mbfl_encoding_2022jp = { "ISO-2022-JP", NULL, NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE, + MBFL_ENCTYPE_GL_UNSAFE, &vtbl_2022jp_wchar, &vtbl_wchar_2022jp }; -const struct mbfl_identify_vtbl vtbl_identify_jis = { - mbfl_no_encoding_jis, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_jis -}; - -const struct mbfl_identify_vtbl vtbl_identify_2022jp = { - mbfl_no_encoding_2022jp, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_2022jp -}; - const struct mbfl_convert_vtbl vtbl_jis_wchar = { mbfl_no_encoding_jis, mbfl_no_encoding_wchar, mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_jis_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_jis_wchar_flush, NULL, }; @@ -96,7 +83,7 @@ const struct mbfl_convert_vtbl vtbl_2022jp_wchar = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_jis_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_jis_wchar_flush, NULL, }; @@ -185,10 +172,6 @@ retry: } } CK((*filter->output_function)(w, filter->data)); - } else if (c == 0x1b) { - filter->status += 2; - } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ - CK((*filter->output_function)(c, filter->data)); } else { w = (c1 << 8) | c; w &= MBFL_WCSGROUP_MASK; @@ -210,7 +193,7 @@ retry: filter->status += 3; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); goto retry; } break; @@ -228,7 +211,7 @@ retry: filter->status++; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); CK((*filter->output_function)(0x24, filter->data)); goto retry; } @@ -247,7 +230,7 @@ retry: filter->status = 0x90; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); CK((*filter->output_function)(0x24, filter->data)); CK((*filter->output_function)(0x28, filter->data)); goto retry; @@ -269,7 +252,7 @@ retry: filter->status = 0x20; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); CK((*filter->output_function)(0x28, filter->data)); goto retry; } @@ -283,17 +266,27 @@ retry: return c; } +static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter) +{ + if ((filter->status & 0xF) == 1) { + /* 2-byte (JIS X 0208 or 0212) character was truncated */ + CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data)); + } + return 0; +} + /* * wchar => JIS */ int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) { - int c1, s; + int s = 0; - s = 0; if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { @@ -302,20 +295,10 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) s = ucs_r_jis_table[c - ucs_r_jis_table_min]; } if (s <= 0) { - c1 = c & ~MBFL_WCSPLANE_MASK; - if (c1 == MBFL_WCSPLANE_JIS0208) { - s = c & MBFL_WCSPLANE_MASK; - } else if (c1 == MBFL_WCSPLANE_JIS0212) { - s = c & MBFL_WCSPLANE_MASK; - s |= 0x8080; - } else if (c == 0xa5) { /* YEN SIGN */ + if (c == 0xa5) { /* YEN SIGN */ s = 0x1005c; - } else if (c == 0x203e) { /* OVER LINE */ - s = 0x1007e; } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s = 0x2142; } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ @@ -411,8 +394,6 @@ mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter) s = 0x1007e; } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s = 0x2141; } else if (c == 0x2225) { /* PARALLEL TO */ s = 0x2142; } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ @@ -483,177 +464,3 @@ mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter) return 0; } - -static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x20: X 0201 kana */ -/* case 0x80: X 0208 */ -/* case 0x90: X 0212 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (c == 0x0e) { /* "kana in" */ - filter->status = 0x20; - } else if (c == 0x0f) { /* "kana out" */ - filter->status = 0; - } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ -/* case 0x91: X 0212 second char */ - case 1: - filter->status &= ~0xf; - if (c == 0x1b) { - goto retry; - } else if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x28) { /* '(' */ - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ ( */ - case 4: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x44) { /* 'D' */ - filter->status = 0x90; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x80: X 0208 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ - case 1: - if (c == 0x1b) { - filter->status++; - } else { - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42) { /* 'B' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} |