diff options
Diffstat (limited to 'ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c')
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c | 76 |
1 files changed, 48 insertions, 28 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c index 04e1d3af17..8d4d3e9d84 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c @@ -35,7 +35,6 @@ extern const unsigned char mblen_table_sjis[]; -extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter); extern int mbfl_bisec_srch(int w, const unsigned short *tbl, int n); extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n); @@ -45,26 +44,20 @@ const mbfl_encoding mbfl_encoding_sjis2004 = { mbfl_no_encoding_sjis2004, "SJIS-2004", "Shift_JIS", - (const char *(*)[])&mbfl_encoding_sjis2004_aliases, + mbfl_encoding_sjis2004_aliases, mblen_table_sjis, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE, + MBFL_ENCTYPE_GL_UNSAFE, &vtbl_sjis2004_wchar, &vtbl_wchar_sjis2004 }; -const struct mbfl_identify_vtbl vtbl_identify_sjis2004 = { - mbfl_no_encoding_sjis2004, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_sjis -}; - const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = { mbfl_no_encoding_sjis2004, mbfl_no_encoding_wchar, mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_jis2004_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_jis2004_wchar_flush, NULL, }; @@ -74,7 +67,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_wchar_jis2004, - mbfl_filt_conv_jis2004_flush, + mbfl_filt_conv_wchar_jis2004_flush, NULL, }; @@ -209,10 +202,18 @@ retry: } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { if (c >= 0x40 && c <= 0xfc && c != 0x7f) { SJIS_DECODE(c1, c, s1, s2); + } else { + CK((*filter->output_function)(c | MBFL_WCSGROUP_THROUGH, filter->data)); + break; + } + } else { /* ISO-2022-JP-2004 */ + if (c >= 0x21 && c <= 0x7E) { + s1 = c1; + s2 = c; + } else { + CK((*filter->output_function)(c | MBFL_WCSGROUP_THROUGH, filter->data)); + break; } - } else { - s1 = c1; - s2 = c; } w1 = (s1 << 8) | s2; @@ -320,6 +321,13 @@ retry: } else { c2 = c; } + + if (c2 < 0x21 || c2 > 0x7E) { + w = (c1 << 8) | c2 | MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + break; + } + s1 = c1 - 0x21; s2 = c2 - 0x21; @@ -405,7 +413,7 @@ retry: filter->status += 3; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); goto retry; } break; @@ -424,7 +432,7 @@ retry: filter->status++; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); CK((*filter->output_function)(0x24, filter->data)); goto retry; } @@ -446,7 +454,7 @@ retry: filter->status = 0xa0; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); CK((*filter->output_function)(0x24, filter->data)); CK((*filter->output_function)(0x28, filter->data)); goto retry; @@ -464,7 +472,7 @@ retry: filter->status = 0; } else { filter->status &= ~0xf; - CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data)); CK((*filter->output_function)(0x28, filter->data)); goto retry; } @@ -478,13 +486,21 @@ retry: return c; } -int -mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) { +int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data)); + } + return 0; +} + +int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) +{ int k; - int c1, c2, s1 = 0, s2; + int c1, c2, s1, s2; retry: - + s1 = 0; /* check for 1st char of combining characters */ if ((filter->status & 0xf)== 0 && ( c == 0x00E6 || @@ -544,6 +560,12 @@ retry: } } + if (s1 <= 0 && filter->to->no_encoding == mbfl_no_encoding_2022jp_2004 && (c == 0x5C || c == 0x7E)) { + /* ISO-2022-JP-2004 can represent ASCII characters directly, so there is no need + * to use the JIS X 0208 REVERSE SOLIDUS for ASCII backslash, or WAVE DASH for tilde */ + s1 = c; + } + /* check for major japanese chars: U+4E00 - U+9FFF */ if (s1 <= 0) { for (k=0; k < uni2jis_tbl_len ;k++) { @@ -586,10 +608,6 @@ retry: } if (s1 <= 0) { - c1 = c & ~MBFL_WCSPLANE_MASK; - if (c1 == MBFL_WCSPLANE_JIS0213) { - s1 = c & MBFL_WCSPLANE_MASK; - } if (c == 0) { s1 = 0; } else if (s1 <= 0) { @@ -672,7 +690,7 @@ retry: } int -mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter) +mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter) { int k, c1, c2, s1, s2; @@ -705,7 +723,9 @@ mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter) CK((*filter->output_function)(s2, filter->data)); } - /* back to latin */ + /* If we had switched to a different charset, go back to ASCII mode + * This makes it possible to concatenate arbitrary valid strings + * together and get a valid string */ if ((filter->status & 0xff00) != 0) { CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ CK((*filter->output_function)(0x28, filter->data)); /* '(' */ |