summaryrefslogtreecommitdiff
path: root/ext/mbstring
diff options
context:
space:
mode:
authorAlex Dowad <alexinbeijing@gmail.com>2020-09-08 22:57:28 +0200
committerAlex Dowad <alexinbeijing@gmail.com>2020-11-11 11:18:58 +0200
commit2f98bd8844de080dd212847c17f3c4c10542b4f4 (patch)
tree082d3d216a1342840b55feac3c3496fd5ea40eca /ext/mbstring
parenta5827c2d351d5362c6c75930a06e993dc6fa40fc (diff)
downloadphp-git-2f98bd8844de080dd212847c17f3c4c10542b4f4.tar.gz
SJIS-2004 encoding conversion: handle invalid (or truncated) 2nd byte for Kanji correctly
If the 2nd byte of a 2-byte character is invalid, then mb_substitute_character() should be respected. Instead, what mbstring was doing was 'swallowing' the first byte, then emitting the 2nd byte as if it was an ASCII character. Likewise, if the 2nd byte is missing, instead of just keeping quiet, report an illegal character as specified by mb_substitute_character().
Diffstat (limited to 'ext/mbstring')
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c2
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c2
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c17
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h3
4 files changed, 18 insertions, 6 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c
index d832358e1d..a14d37ec93 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c
@@ -62,6 +62,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004 = {
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_jis2004,
- mbfl_filt_conv_jis2004_flush,
+ mbfl_filt_conv_wchar_jis2004_flush,
NULL,
};
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c
index ce9267a2ec..a8f2a1da53 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c
@@ -62,6 +62,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = {
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_jis2004,
- mbfl_filt_conv_jis2004_flush,
+ mbfl_filt_conv_wchar_jis2004_flush,
NULL,
};
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
index 8be40f02aa..a81cd29b70 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
@@ -57,7 +57,7 @@ const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = {
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_jis2004_wchar,
- mbfl_filt_conv_common_flush,
+ mbfl_filt_conv_jis2004_wchar_flush,
NULL,
};
@@ -67,7 +67,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = {
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_jis2004,
- mbfl_filt_conv_jis2004_flush,
+ mbfl_filt_conv_wchar_jis2004_flush,
NULL,
};
@@ -202,6 +202,9 @@ retry:
} else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) {
if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
SJIS_DECODE(c1, c, s1, s2);
+ } else {
+ CK((*filter->output_function)(c | MBFL_WCSGROUP_THROUGH, filter->data));
+ break;
}
} else {
s1 = c1;
@@ -471,6 +474,14 @@ retry:
return c;
}
+int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter)
+{
+ if (filter->status & 0xF) {
+ CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data));
+ }
+ return 0;
+}
+
int
mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) {
int k;
@@ -665,7 +676,7 @@ retry:
}
int
-mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter)
+mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter)
{
int k, c1, c2, s1, s2;
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h
index 5eb72ca891..869fd145c1 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h
+++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h
@@ -39,7 +39,8 @@ extern const struct mbfl_convert_vtbl vtbl_wchar_sjis2004;
int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter);
-int mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter);
+int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_SJIS_2004_H */