summaryrefslogtreecommitdiff
path: root/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c')
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c76
1 files changed, 48 insertions, 28 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
index 04e1d3af17..8d4d3e9d84 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
@@ -35,7 +35,6 @@
extern const unsigned char mblen_table_sjis[];
-extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
extern int mbfl_bisec_srch(int w, const unsigned short *tbl, int n);
extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
@@ -45,26 +44,20 @@ const mbfl_encoding mbfl_encoding_sjis2004 = {
mbfl_no_encoding_sjis2004,
"SJIS-2004",
"Shift_JIS",
- (const char *(*)[])&mbfl_encoding_sjis2004_aliases,
+ mbfl_encoding_sjis2004_aliases,
mblen_table_sjis,
- MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
+ MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_sjis2004_wchar,
&vtbl_wchar_sjis2004
};
-const struct mbfl_identify_vtbl vtbl_identify_sjis2004 = {
- mbfl_no_encoding_sjis2004,
- mbfl_filt_ident_common_ctor,
- mbfl_filt_ident_sjis
-};
-
const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = {
mbfl_no_encoding_sjis2004,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_jis2004_wchar,
- mbfl_filt_conv_common_flush,
+ mbfl_filt_conv_jis2004_wchar_flush,
NULL,
};
@@ -74,7 +67,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = {
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_jis2004,
- mbfl_filt_conv_jis2004_flush,
+ mbfl_filt_conv_wchar_jis2004_flush,
NULL,
};
@@ -209,10 +202,18 @@ retry:
} else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) {
if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
SJIS_DECODE(c1, c, s1, s2);
+ } else {
+ CK((*filter->output_function)(c | MBFL_WCSGROUP_THROUGH, filter->data));
+ break;
+ }
+ } else { /* ISO-2022-JP-2004 */
+ if (c >= 0x21 && c <= 0x7E) {
+ s1 = c1;
+ s2 = c;
+ } else {
+ CK((*filter->output_function)(c | MBFL_WCSGROUP_THROUGH, filter->data));
+ break;
}
- } else {
- s1 = c1;
- s2 = c;
}
w1 = (s1 << 8) | s2;
@@ -320,6 +321,13 @@ retry:
} else {
c2 = c;
}
+
+ if (c2 < 0x21 || c2 > 0x7E) {
+ w = (c1 << 8) | c2 | MBFL_WCSGROUP_THROUGH;
+ CK((*filter->output_function)(w, filter->data));
+ break;
+ }
+
s1 = c1 - 0x21;
s2 = c2 - 0x21;
@@ -405,7 +413,7 @@ retry:
filter->status += 3;
} else {
filter->status &= ~0xf;
- CK((*filter->output_function)(0x1b, filter->data));
+ CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data));
goto retry;
}
break;
@@ -424,7 +432,7 @@ retry:
filter->status++;
} else {
filter->status &= ~0xf;
- CK((*filter->output_function)(0x1b, filter->data));
+ CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data));
CK((*filter->output_function)(0x24, filter->data));
goto retry;
}
@@ -446,7 +454,7 @@ retry:
filter->status = 0xa0;
} else {
filter->status &= ~0xf;
- CK((*filter->output_function)(0x1b, filter->data));
+ CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data));
CK((*filter->output_function)(0x24, filter->data));
CK((*filter->output_function)(0x28, filter->data));
goto retry;
@@ -464,7 +472,7 @@ retry:
filter->status = 0;
} else {
filter->status &= ~0xf;
- CK((*filter->output_function)(0x1b, filter->data));
+ CK((*filter->output_function)(0x1b | MBFL_WCSGROUP_THROUGH, filter->data));
CK((*filter->output_function)(0x28, filter->data));
goto retry;
}
@@ -478,13 +486,21 @@ retry:
return c;
}
-int
-mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) {
+int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter)
+{
+ if (filter->status & 0xF) {
+ CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data));
+ }
+ return 0;
+}
+
+int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter)
+{
int k;
- int c1, c2, s1 = 0, s2;
+ int c1, c2, s1, s2;
retry:
-
+ s1 = 0;
/* check for 1st char of combining characters */
if ((filter->status & 0xf)== 0 && (
c == 0x00E6 ||
@@ -544,6 +560,12 @@ retry:
}
}
+ if (s1 <= 0 && filter->to->no_encoding == mbfl_no_encoding_2022jp_2004 && (c == 0x5C || c == 0x7E)) {
+ /* ISO-2022-JP-2004 can represent ASCII characters directly, so there is no need
+ * to use the JIS X 0208 REVERSE SOLIDUS for ASCII backslash, or WAVE DASH for tilde */
+ s1 = c;
+ }
+
/* check for major japanese chars: U+4E00 - U+9FFF */
if (s1 <= 0) {
for (k=0; k < uni2jis_tbl_len ;k++) {
@@ -586,10 +608,6 @@ retry:
}
if (s1 <= 0) {
- c1 = c & ~MBFL_WCSPLANE_MASK;
- if (c1 == MBFL_WCSPLANE_JIS0213) {
- s1 = c & MBFL_WCSPLANE_MASK;
- }
if (c == 0) {
s1 = 0;
} else if (s1 <= 0) {
@@ -672,7 +690,7 @@ retry:
}
int
-mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter)
+mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter)
{
int k, c1, c2, s1, s2;
@@ -705,7 +723,9 @@ mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter)
CK((*filter->output_function)(s2, filter->data));
}
- /* back to latin */
+ /* If we had switched to a different charset, go back to ASCII mode
+ * This makes it possible to concatenate arbitrary valid strings
+ * together and get a valid string */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */