diff options
author | Alex Dowad <alexinbeijing@gmail.com> | 2020-09-08 22:57:28 +0200 |
---|---|---|
committer | Alex Dowad <alexinbeijing@gmail.com> | 2020-11-11 11:18:58 +0200 |
commit | 2f98bd8844de080dd212847c17f3c4c10542b4f4 (patch) | |
tree | 082d3d216a1342840b55feac3c3496fd5ea40eca /ext/mbstring | |
parent | a5827c2d351d5362c6c75930a06e993dc6fa40fc (diff) | |
download | php-git-2f98bd8844de080dd212847c17f3c4c10542b4f4.tar.gz |
SJIS-2004 encoding conversion: handle invalid (or truncated) 2nd byte for Kanji correctly
If the 2nd byte of a 2-byte character is invalid, then mb_substitute_character()
should be respected. Instead, what mbstring was doing was 'swallowing' the
first byte, then emitting the 2nd byte as if it was an ASCII character.
Likewise, if the 2nd byte is missing, instead of just keeping quiet, report an
illegal character as specified by mb_substitute_character().
Diffstat (limited to 'ext/mbstring')
4 files changed, 18 insertions, 6 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c index d832358e1d..a14d37ec93 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c @@ -62,6 +62,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004 = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_wchar_jis2004, - mbfl_filt_conv_jis2004_flush, + mbfl_filt_conv_wchar_jis2004_flush, NULL, }; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c index ce9267a2ec..a8f2a1da53 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c @@ -62,6 +62,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_wchar_jis2004, - mbfl_filt_conv_jis2004_flush, + mbfl_filt_conv_wchar_jis2004_flush, NULL, }; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c index 8be40f02aa..a81cd29b70 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c @@ -57,7 +57,7 @@ const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_jis2004_wchar, - mbfl_filt_conv_common_flush, + mbfl_filt_conv_jis2004_wchar_flush, NULL, }; @@ -67,7 +67,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = { mbfl_filt_conv_common_ctor, NULL, mbfl_filt_conv_wchar_jis2004, - mbfl_filt_conv_jis2004_flush, + mbfl_filt_conv_wchar_jis2004_flush, NULL, }; @@ -202,6 +202,9 @@ retry: } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { if (c >= 0x40 && c <= 0xfc && c != 0x7f) { SJIS_DECODE(c1, c, s1, s2); + } else { + CK((*filter->output_function)(c | MBFL_WCSGROUP_THROUGH, filter->data)); + break; } } else { s1 = c1; @@ -471,6 +474,14 @@ retry: return c; } +int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data)); + } + return 0; +} + int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) { int k; @@ -665,7 +676,7 @@ retry: } int -mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter) +mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter) { int k, c1, c2, s1, s2; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h index 5eb72ca891..869fd145c1 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h @@ -39,7 +39,8 @@ extern const struct mbfl_convert_vtbl vtbl_wchar_sjis2004; int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter); +int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter); #endif /* MBFL_MBFILTER_SJIS_2004_H */ |