summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Dowad <alexinbeijing@gmail.com>2020-10-07 22:54:39 +0200
committerAlex Dowad <alexinbeijing@gmail.com>2021-01-15 21:55:41 +0200
commitfcbe45de1042c06c00cc9f957c7654d4835dafa3 (patch)
tree2e62317778cde9fe4f618467ee8d6a37241745ec
parent888f5d7729878caa73908a0cc09823d611d4044b (diff)
downloadphp-git-fcbe45de1042c06c00cc9f957c7654d4835dafa3.tar.gz
Remove useless mbstring encoding 'CP50220-raw'
CP50220 is a variant of ISO-2022-JP invented by MicroSoft, which handles some Unicode characters which are not representable in ISO-2022-JP by converting them to similar characters which are representable. What, then, is CP50220-raw? An Internet search turns up absolutely nothing. Reference works which I consulted don't say anything about it. Other text conversion libraries don't support it. From looking at the code: It's just the same as CP50220, but it accepts unmapped JIS X 0208 characters passed through from other Japanese encodings and silently encodes them using the usual ISO-2022-JP escape sequence and representation for JIS X 0208 characters. It's hard to see how this could be useful. OK, let me come out and say it: it's _not_ useful. We can confidently jettison this (mis)feature.
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c63
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h4
-rw-r--r--ext/mbstring/libmbfl/mbfl/mbfl_encoding.c1
-rw-r--r--ext/mbstring/libmbfl/mbfl/mbfl_encoding.h1
4 files changed, 8 insertions, 61 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
index 7b4b2630f4..d98366d1a6 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
@@ -41,6 +41,13 @@ static void mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt);
static void mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest);
static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter);
+/* Previously, a dubious 'encoding' called 'cp50220raw' was supported
+ * This was just CP50220, but the implementation was less strict regarding
+ * invalid characters; it would silently pass some through
+ * This 'encoding' only existed in mbstring. In case some poor, lost soul is
+ * still using it, retain minimal support by aliasing it to CP50220 */
+static const char *cp50220_aliases[] = {"cp50220raw", "cp50220-raw", NULL};
+
const mbfl_encoding mbfl_encoding_jis_ms = {
mbfl_no_encoding_jis_ms,
"JIS-ms",
@@ -56,24 +63,13 @@ const mbfl_encoding mbfl_encoding_cp50220 = {
mbfl_no_encoding_cp50220,
"CP50220",
"ISO-2022-JP",
- NULL,
+ cp50220_aliases,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_cp50220_wchar,
&vtbl_wchar_cp50220
};
-const mbfl_encoding mbfl_encoding_cp50220raw = {
- mbfl_no_encoding_cp50220raw,
- "CP50220raw",
- "ISO-2022-JP",
- NULL,
- NULL,
- MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE,
- &vtbl_cp50220raw_wchar,
- &vtbl_wchar_cp50220raw
-};
-
const mbfl_encoding mbfl_encoding_cp50221 = {
mbfl_no_encoding_cp50221,
"CP50221",
@@ -136,26 +132,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = {
mbfl_filt_conv_wchar_cp50220_copy
};
-const struct mbfl_convert_vtbl vtbl_cp50220raw_wchar = {
- mbfl_no_encoding_cp50220raw,
- mbfl_no_encoding_wchar,
- mbfl_filt_conv_common_ctor,
- NULL,
- mbfl_filt_conv_jis_ms_wchar,
- mbfl_filt_conv_common_flush,
- NULL,
-};
-
-const struct mbfl_convert_vtbl vtbl_wchar_cp50220raw = {
- mbfl_no_encoding_wchar,
- mbfl_no_encoding_cp50220raw,
- mbfl_filt_conv_wchar_cp50220_ctor,
- mbfl_filt_conv_wchar_cp50220_dtor,
- mbfl_filt_conv_wchar_cp50220raw,
- mbfl_filt_conv_any_jis_flush,
- mbfl_filt_conv_wchar_cp50220_copy
-};
-
const struct mbfl_convert_vtbl vtbl_cp50221_wchar = {
mbfl_no_encoding_cp50221,
mbfl_no_encoding_wchar,
@@ -572,29 +548,6 @@ mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt)
}
/*
- * wchar => cp50220raw
- */
-int
-mbfl_filt_conv_wchar_cp50220raw(int c, mbfl_convert_filter *filter)
-{
- if (c & MBFL_WCSPLANE_JIS0208) {
- const int s = c & MBFL_WCSPLANE_MASK;
-
- if ((filter->status & 0xff00) != 0x200) {
- CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
- CK((*filter->output_function)(0x24, filter->data)); /* '$' */
- CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
- filter->status = 0x200;
- }
- CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
- CK((*filter->output_function)(s & 0x7f, filter->data));
- return c;
- } else {
- return mbfl_filt_conv_wchar_cp50221(c, filter);
- }
-}
-
-/*
* wchar => CP50221
*/
int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter)
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h
index e97cd0fab8..12ab19d497 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h
@@ -34,7 +34,6 @@
extern const mbfl_encoding mbfl_encoding_jis_ms;
extern const mbfl_encoding mbfl_encoding_cp50220;
-extern const mbfl_encoding mbfl_encoding_cp50220raw;
extern const mbfl_encoding mbfl_encoding_cp50221;
extern const mbfl_encoding mbfl_encoding_cp50222;
@@ -42,8 +41,6 @@ extern const struct mbfl_convert_vtbl vtbl_jis_ms_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_jis_ms;
extern const struct mbfl_convert_vtbl vtbl_cp50220_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp50220;
-extern const struct mbfl_convert_vtbl vtbl_cp50220raw_wchar;
-extern const struct mbfl_convert_vtbl vtbl_wchar_cp50220raw;
extern const struct mbfl_convert_vtbl vtbl_cp50221_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp50221;
extern const struct mbfl_convert_vtbl vtbl_cp50222_wchar;
@@ -52,7 +49,6 @@ extern const struct mbfl_convert_vtbl vtbl_wchar_cp50222;
int mbfl_filt_conv_jis_ms_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter);
-int mbfl_filt_conv_wchar_cp50220raw(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter);
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
index e558fba4b9..12239b96ea 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
@@ -165,7 +165,6 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_2022jp_2004,
&mbfl_encoding_2022jp_kddi,
&mbfl_encoding_cp50220,
- &mbfl_encoding_cp50220raw,
&mbfl_encoding_cp50221,
&mbfl_encoding_cp50222,
NULL
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
index 40eb6e9bb9..9f926d035c 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
@@ -115,7 +115,6 @@ enum mbfl_no_encoding {
mbfl_no_encoding_cp850,
mbfl_no_encoding_jis_ms,
mbfl_no_encoding_cp50220,
- mbfl_no_encoding_cp50220raw,
mbfl_no_encoding_cp50221,
mbfl_no_encoding_cp50222,
mbfl_no_encoding_charset_max