summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2017-08-03 21:57:35 +0200
committerNikita Popov <nikita.ppv@gmail.com>2017-08-03 21:57:35 +0200
commitc98714f19e6f033a7b45a24cf73fc9460cee3254 (patch)
tree7eb8a69f52747f839d43f2dad1505b4c42042739
parent3d948d77d112684781fd81e7124350d3a32e5245 (diff)
parentfb9bf5b64b6c09b9d93bbd1dadd64884e0af66f3 (diff)
downloadphp-git-c98714f19e6f033a7b45a24cf73fc9460cee3254.tar.gz
Merge branch 'PHP-7.2'
-rw-r--r--ext/mbstring/libmbfl/mbfl/mbfl_convert.c22
-rw-r--r--ext/mbstring/mbstring.c35
-rw-r--r--ext/mbstring/tests/bug69086.phpt6
-rw-r--r--ext/mbstring/tests/mb_chr.phpt2
-rw-r--r--ext/mbstring/tests/mb_substitute_character_variation2.phpt2
5 files changed, 29 insertions, 38 deletions
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
index 2f98b58178..69af1476c5 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
@@ -466,14 +466,26 @@ int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char
int
mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
{
- int mode_backup, ret, n, m, r;
+ int mode_backup, substchar_backup, ret, n, m, r;
ret = 0;
+
mode_backup = filter->illegal_mode;
- filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
+ substchar_backup = filter->illegal_substchar;
+
+ /* The used substitution character may not be supported by the target character encoding.
+ * If that happens, first try to use "?" instead and if that also fails, silently drop the
+ * character. */
+ if (filter->illegal_mode == MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR
+ && filter->illegal_substchar != 0x3f) {
+ filter->illegal_substchar = 0x3f;
+ } else {
+ filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
+ }
+
switch (mode_backup) {
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR:
- ret = (*filter->filter_function)(filter->illegal_substchar, filter);
+ ret = (*filter->filter_function)(substchar_backup, filter);
break;
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
if (c >= 0) {
@@ -559,14 +571,16 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
}
ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
} else {
- ret = (*filter->filter_function)(filter->illegal_substchar, filter);
+ ret = (*filter->filter_function)(substchar_backup, filter);
}
}
break;
default:
break;
}
+
filter->illegal_mode = mode_backup;
+ filter->illegal_substchar = substchar_backup;
filter->num_illegalchar++;
return ret;
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index 584523ecaf..a5430b4442 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -970,6 +970,7 @@ static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_leng
if (convd == NULL) {
return (size_t) -1;
}
+
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
@@ -3053,27 +3054,7 @@ MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length,
}
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
- if (string.encoding == MBSTRG(current_internal_encoding)) {
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
- } else if (php_mb_is_no_encoding_unicode(string.encoding->no_encoding)
- && php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
-
- if (php_mb_is_no_encoding_utf8(string.encoding->no_encoding)) {
- if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff &&
- 0xe000 > MBSTRG(current_filter_illegal_substchar)
- ) {
- mbfl_buffer_converter_illegal_substchar(convd, 0x3f);
- } else {
- mbfl_buffer_converter_illegal_substchar(convd,
- MBSTRG(current_filter_illegal_substchar));
- }
- } else {
- mbfl_buffer_converter_illegal_substchar(convd,
- MBSTRG(current_filter_illegal_substchar));
- }
- } else {
- mbfl_buffer_converter_illegal_substchar(convd, 0x3f);
- }
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
/* do it */
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
@@ -5017,17 +4998,7 @@ static inline char* php_mb_chr(zend_long cp, const char* enc_name, size_t *outpu
if (php_mb_is_no_encoding_utf8(no_enc)) {
if (0 > cp || cp > 0x10ffff || (cp > 0xd7ff && 0xe000 > cp)) {
- if (php_mb_is_no_encoding_utf8(MBSTRG(current_internal_encoding)->no_encoding)) {
- cp = MBSTRG(current_filter_illegal_substchar);
- } else if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
- if (0xd800 > MBSTRG(current_filter_illegal_substchar) || MBSTRG(current_filter_illegal_substchar) > 0xdfff) {
- cp = MBSTRG(current_filter_illegal_substchar);
- } else {
- cp = 0x3f;
- }
- } else {
- cp = 0x3f;
- }
+ cp = MBSTRG(current_filter_illegal_substchar);
}
if (cp < 0x80) {
diff --git a/ext/mbstring/tests/bug69086.phpt b/ext/mbstring/tests/bug69086.phpt
index 921d61cca4..9566e10968 100644
--- a/ext/mbstring/tests/bug69086.phpt
+++ b/ext/mbstring/tests/bug69086.phpt
@@ -8,7 +8,13 @@ mb_substitute_character(0xfffd);
var_dump("?" === mb_convert_encoding("\x80", "Shift_JIS", "EUC-JP"));
mb_internal_encoding("UCS-4BE");
var_dump("\x00\x00\xff\xfd" === mb_convert_encoding("\x80", "UCS-4BE", "UTF-8"));
+
+mb_internal_encoding("UTF-8");
+mb_substitute_character(0xfffd);
+var_dump("\u{fffd}" === mb_convert_encoding("\x80", "UTF-8", "EUC-JP-2004"));
+
?>
--EXPECT--
bool(true)
bool(true)
+bool(true)
diff --git a/ext/mbstring/tests/mb_chr.phpt b/ext/mbstring/tests/mb_chr.phpt
index 19e1a704ec..8ec35920c3 100644
--- a/ext/mbstring/tests/mb_chr.phpt
+++ b/ext/mbstring/tests/mb_chr.phpt
@@ -22,7 +22,7 @@ var_dump(
mb_internal_encoding("EUC-JP");
mb_substitute_character(0xa4a2);
var_dump(
- "?" === mb_chr(0xd800, "UTF-8")
+ "\u{a4a2}" === mb_chr(0xd800, "UTF-8")
);
// Invalid
diff --git a/ext/mbstring/tests/mb_substitute_character_variation2.phpt b/ext/mbstring/tests/mb_substitute_character_variation2.phpt
index 202561afc7..6248174aa6 100644
--- a/ext/mbstring/tests/mb_substitute_character_variation2.phpt
+++ b/ext/mbstring/tests/mb_substitute_character_variation2.phpt
@@ -35,5 +35,5 @@ var_dump(bin2hex(mb_convert_encoding($string_mb, "ISO-8859-1", "UTF-8")));
string(14) "3f3f3f3f3f3f3f"
string(14) "42424242424242"
string(0) ""
-string(0) ""
+string(14) "3f3f3f3f3f3f3f"
===DONE===