diff options
Diffstat (limited to 'ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c')
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c | 151 |
1 files changed, 37 insertions, 114 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c index 252a10a58d..e3e7dfc931 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c @@ -33,7 +33,7 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" -int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter); +static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter); const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -60,26 +60,20 @@ const mbfl_encoding mbfl_encoding_euc_jp = { mbfl_no_encoding_euc_jp, "EUC-JP", "EUC-JP", - (const char *(*)[])&mbfl_encoding_euc_jp_aliases, + mbfl_encoding_euc_jp_aliases, mblen_table_eucjp, - MBFL_ENCTYPE_MBCS, + 0, &vtbl_eucjp_wchar, &vtbl_wchar_eucjp }; -const struct mbfl_identify_vtbl vtbl_identify_eucjp = { - mbfl_no_encoding_euc_jp, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_eucjp -}; - const struct mbfl_convert_vtbl vtbl_eucjp_wchar = { mbfl_no_encoding_euc_jp, mbfl_no_encoding_wchar, mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_eucjp_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_eucjp_wchar_flush, NULL, }; @@ -101,7 +95,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_eucjp = { int mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter) { - int c1, s, w; + int c1, s, w = 0; switch (filter->status) { case 0: @@ -115,9 +109,7 @@ mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter) } else if (c == 0x8f) { /* X 0212 first char */ filter->status = 3; } else { - w = c & MBFL_WCSGROUP_MASK; - w |= MBFL_WCSGROUP_THROUGH; - CK((*filter->output_function)(w, filter->data)); + CK((*filter->output_function)(c | MBFL_WCSGROUP_THROUGH, filter->data)); } break; @@ -128,21 +120,13 @@ mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter) s = (c1 - 0xa1)*94 + c - 0xa1; if (s >= 0 && s < jisx0208_ucs_table_size) { w = jisx0208_ucs_table[s]; - } else { - w = 0; } if (w <= 0) { - w = ((c1 & 0x7f) << 8) | (c & 0x7f); - w &= MBFL_WCSPLANE_MASK; - w |= MBFL_WCSPLANE_JIS0208; + w = ((c1 & 0x7f) << 8) | (c & 0x7f) | MBFL_WCSPLANE_JIS0208; } CK((*filter->output_function)(w, filter->data)); - } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ - CK((*filter->output_function)(c, filter->data)); } else { - w = (c1 << 8) | c; - w &= MBFL_WCSGROUP_MASK; - w |= MBFL_WCSGROUP_THROUGH; + w = (c1 << 8) | c | MBFL_WCSGROUP_THROUGH; CK((*filter->output_function)(w, filter->data)); } break; @@ -152,47 +136,31 @@ mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter) if (c > 0xa0 && c < 0xe0) { w = 0xfec0 + c; CK((*filter->output_function)(w, filter->data)); - } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ - CK((*filter->output_function)(c, filter->data)); } else { - w = 0x8e00 | c; - w &= MBFL_WCSGROUP_MASK; - w |= MBFL_WCSGROUP_THROUGH; + w = 0x8e00 | c | MBFL_WCSGROUP_THROUGH; CK((*filter->output_function)(w, filter->data)); } break; - case 3: /* got 0x8f, X 0212 first char */ - if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ - CK((*filter->output_function)(c, filter->data)); - filter->status = 0; - } else { - filter->status++; - filter->cache = c; - } + case 3: /* got 0x8f, JIS X 0212 first byte */ + filter->status++; + filter->cache = c; break; - case 4: /* got 0x8f, X 0212 second char */ + + case 4: /* got 0x8f, JIS X 0212 second byte */ filter->status = 0; c1 = filter->cache; - if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) { + if (c > 0xA0 && c < 0xFF && c1 > 0xA0 && c1 < 0xFF) { s = (c1 - 0xa1)*94 + c - 0xa1; if (s >= 0 && s < jisx0212_ucs_table_size) { w = jisx0212_ucs_table[s]; - } else { - w = 0; } if (w <= 0) { - w = ((c1 & 0x7f) << 8) | (c & 0x7f); - w &= MBFL_WCSPLANE_MASK; - w |= MBFL_WCSPLANE_JIS0212; + w = ((c1 & 0x7f) << 8) | (c & 0x7f) | MBFL_WCSPLANE_JIS0212; } CK((*filter->output_function)(w, filter->data)); - } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ - CK((*filter->output_function)(c, filter->data)); } else { - w = (c1 << 8) | c | 0x8f0000; - w &= MBFL_WCSGROUP_MASK; - w |= MBFL_WCSGROUP_THROUGH; + w = (c1 << 8) | c | 0x8f0000 | MBFL_WCSGROUP_THROUGH; CK((*filter->output_function)(w, filter->data)); } break; @@ -205,16 +173,30 @@ mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter) return c; } +static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + (*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + /* * wchar => EUC-JP */ int mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter) { - int c1, s; + int s = 0; - s = 0; - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + if (c == 0xAF) { /* U+00AF is MACRON */ + s = 0xA2B4; /* Use JIS X 0212 overline */ + } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; @@ -224,13 +206,7 @@ mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter) s = ucs_r_jis_table[c - ucs_r_jis_table_min]; } if (s <= 0) { - c1 = c & ~MBFL_WCSPLANE_MASK; - if (c1 == MBFL_WCSPLANE_JIS0208) { - s = c & MBFL_WCSPLANE_MASK; - } else if (c1 == MBFL_WCSPLANE_JIS0212) { - s = c & MBFL_WCSPLANE_MASK; - s |= 0x8080; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s = 0x2140; } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ s = 0x2141; @@ -244,10 +220,9 @@ mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter) s = 0x2172; } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ s = 0x224c; - } - if (c == 0) { + } else if (c == 0) { s = 0; - } else if (s <= 0) { + } else { s = -1; } } @@ -271,55 +246,3 @@ mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter) return c; } - -int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c > 0xa0 && c < 0xff) { /* kanji first char */ - filter->status = 1; - } else if (c == 0x8e) { /* kana first char */ - filter->status = 2; - } else if (c == 0x8f) { /* X 0212 first char */ - filter->status = 3; - } else { /* bad */ - filter->flag = 1; - } - break; - - case 1: /* got first half */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 2: /* got 0x8e */ - if (c < 0xa1 || c > 0xdf) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 3: /* got 0x8f */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status++; - break; - case 4: /* got 0x8f */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} |