diff options
author | Alex Dowad <alexinbeijing@gmail.com> | 2020-10-07 22:54:39 +0200 |
---|---|---|
committer | Alex Dowad <alexinbeijing@gmail.com> | 2021-01-15 21:55:41 +0200 |
commit | fcbe45de1042c06c00cc9f957c7654d4835dafa3 (patch) | |
tree | 2e62317778cde9fe4f618467ee8d6a37241745ec | |
parent | 888f5d7729878caa73908a0cc09823d611d4044b (diff) | |
download | php-git-fcbe45de1042c06c00cc9f957c7654d4835dafa3.tar.gz |
Remove useless mbstring encoding 'CP50220-raw'
CP50220 is a variant of ISO-2022-JP invented by MicroSoft, which handles some
Unicode characters which are not representable in ISO-2022-JP by converting
them to similar characters which are representable.
What, then, is CP50220-raw? An Internet search turns up absolutely nothing.
Reference works which I consulted don't say anything about it. Other text
conversion libraries don't support it.
From looking at the code: It's just the same as CP50220, but it accepts
unmapped JIS X 0208 characters passed through from other Japanese encodings
and silently encodes them using the usual ISO-2022-JP escape sequence and
representation for JIS X 0208 characters.
It's hard to see how this could be useful. OK, let me come out and say it:
it's _not_ useful. We can confidently jettison this (mis)feature.
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c | 63 | ||||
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h | 4 | ||||
-rw-r--r-- | ext/mbstring/libmbfl/mbfl/mbfl_encoding.c | 1 | ||||
-rw-r--r-- | ext/mbstring/libmbfl/mbfl/mbfl_encoding.h | 1 |
4 files changed, 8 insertions, 61 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c index 7b4b2630f4..d98366d1a6 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c @@ -41,6 +41,13 @@ static void mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt); static void mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest); static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter); +/* Previously, a dubious 'encoding' called 'cp50220raw' was supported + * This was just CP50220, but the implementation was less strict regarding + * invalid characters; it would silently pass some through + * This 'encoding' only existed in mbstring. In case some poor, lost soul is + * still using it, retain minimal support by aliasing it to CP50220 */ +static const char *cp50220_aliases[] = {"cp50220raw", "cp50220-raw", NULL}; + const mbfl_encoding mbfl_encoding_jis_ms = { mbfl_no_encoding_jis_ms, "JIS-ms", @@ -56,24 +63,13 @@ const mbfl_encoding mbfl_encoding_cp50220 = { mbfl_no_encoding_cp50220, "CP50220", "ISO-2022-JP", - NULL, + cp50220_aliases, NULL, MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE, &vtbl_cp50220_wchar, &vtbl_wchar_cp50220 }; -const mbfl_encoding mbfl_encoding_cp50220raw = { - mbfl_no_encoding_cp50220raw, - "CP50220raw", - "ISO-2022-JP", - NULL, - NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp50220raw_wchar, - &vtbl_wchar_cp50220raw -}; - const mbfl_encoding mbfl_encoding_cp50221 = { mbfl_no_encoding_cp50221, "CP50221", @@ -136,26 +132,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = { mbfl_filt_conv_wchar_cp50220_copy }; -const struct mbfl_convert_vtbl vtbl_cp50220raw_wchar = { - mbfl_no_encoding_cp50220raw, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis_ms_wchar, - mbfl_filt_conv_common_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp50220raw = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp50220raw, - mbfl_filt_conv_wchar_cp50220_ctor, - mbfl_filt_conv_wchar_cp50220_dtor, - mbfl_filt_conv_wchar_cp50220raw, - mbfl_filt_conv_any_jis_flush, - mbfl_filt_conv_wchar_cp50220_copy -}; - const struct mbfl_convert_vtbl vtbl_cp50221_wchar = { mbfl_no_encoding_cp50221, mbfl_no_encoding_wchar, @@ -572,29 +548,6 @@ mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt) } /* - * wchar => cp50220raw - */ -int -mbfl_filt_conv_wchar_cp50220raw(int c, mbfl_convert_filter *filter) -{ - if (c & MBFL_WCSPLANE_JIS0208) { - const int s = c & MBFL_WCSPLANE_MASK; - - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - filter->status = 0x200; - } - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); - return c; - } else { - return mbfl_filt_conv_wchar_cp50221(c, filter); - } -} - -/* * wchar => CP50221 */ int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h index e97cd0fab8..12ab19d497 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h @@ -34,7 +34,6 @@ extern const mbfl_encoding mbfl_encoding_jis_ms; extern const mbfl_encoding mbfl_encoding_cp50220; -extern const mbfl_encoding mbfl_encoding_cp50220raw; extern const mbfl_encoding mbfl_encoding_cp50221; extern const mbfl_encoding mbfl_encoding_cp50222; @@ -42,8 +41,6 @@ extern const struct mbfl_convert_vtbl vtbl_jis_ms_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_jis_ms; extern const struct mbfl_convert_vtbl vtbl_cp50220_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_cp50220; -extern const struct mbfl_convert_vtbl vtbl_cp50220raw_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp50220raw; extern const struct mbfl_convert_vtbl vtbl_cp50221_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_cp50221; extern const struct mbfl_convert_vtbl vtbl_cp50222_wchar; @@ -52,7 +49,6 @@ extern const struct mbfl_convert_vtbl vtbl_wchar_cp50222; int mbfl_filt_conv_jis_ms_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_cp50220raw(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter); diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c index e558fba4b9..12239b96ea 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c @@ -165,7 +165,6 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = { &mbfl_encoding_2022jp_2004, &mbfl_encoding_2022jp_kddi, &mbfl_encoding_cp50220, - &mbfl_encoding_cp50220raw, &mbfl_encoding_cp50221, &mbfl_encoding_cp50222, NULL diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h index 40eb6e9bb9..9f926d035c 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h @@ -115,7 +115,6 @@ enum mbfl_no_encoding { mbfl_no_encoding_cp850, mbfl_no_encoding_jis_ms, mbfl_no_encoding_cp50220, - mbfl_no_encoding_cp50220raw, mbfl_no_encoding_cp50221, mbfl_no_encoding_cp50222, mbfl_no_encoding_charset_max |