diff options
author | Nikita Popov <nikita.ppv@gmail.com> | 2017-08-03 21:57:35 +0200 |
---|---|---|
committer | Nikita Popov <nikita.ppv@gmail.com> | 2017-08-03 21:57:35 +0200 |
commit | c98714f19e6f033a7b45a24cf73fc9460cee3254 (patch) | |
tree | 7eb8a69f52747f839d43f2dad1505b4c42042739 | |
parent | 3d948d77d112684781fd81e7124350d3a32e5245 (diff) | |
parent | fb9bf5b64b6c09b9d93bbd1dadd64884e0af66f3 (diff) | |
download | php-git-c98714f19e6f033a7b45a24cf73fc9460cee3254.tar.gz |
Merge branch 'PHP-7.2'
-rw-r--r-- | ext/mbstring/libmbfl/mbfl/mbfl_convert.c | 22 | ||||
-rw-r--r-- | ext/mbstring/mbstring.c | 35 | ||||
-rw-r--r-- | ext/mbstring/tests/bug69086.phpt | 6 | ||||
-rw-r--r-- | ext/mbstring/tests/mb_chr.phpt | 2 | ||||
-rw-r--r-- | ext/mbstring/tests/mb_substitute_character_variation2.phpt | 2 |
5 files changed, 29 insertions, 38 deletions
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c index 2f98b58178..69af1476c5 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -466,14 +466,26 @@ int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) { - int mode_backup, ret, n, m, r; + int mode_backup, substchar_backup, ret, n, m, r; ret = 0; + mode_backup = filter->illegal_mode; - filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; + substchar_backup = filter->illegal_substchar; + + /* The used substitution character may not be supported by the target character encoding. + * If that happens, first try to use "?" instead and if that also fails, silently drop the + * character. */ + if (filter->illegal_mode == MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR + && filter->illegal_substchar != 0x3f) { + filter->illegal_substchar = 0x3f; + } else { + filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; + } + switch (mode_backup) { case MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR: - ret = (*filter->filter_function)(filter->illegal_substchar, filter); + ret = (*filter->filter_function)(substchar_backup, filter); break; case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG: if (c >= 0) { @@ -559,14 +571,16 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) } ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";"); } else { - ret = (*filter->filter_function)(filter->illegal_substchar, filter); + ret = (*filter->filter_function)(substchar_backup, filter); } } break; default: break; } + filter->illegal_mode = mode_backup; + filter->illegal_substchar = substchar_backup; filter->num_illegalchar++; return ret; diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 584523ecaf..a5430b4442 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -970,6 +970,7 @@ static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_leng if (convd == NULL) { return (size_t) -1; } + mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); @@ -3053,27 +3054,7 @@ MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, } mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); - if (string.encoding == MBSTRG(current_internal_encoding)) { - mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); - } else if (php_mb_is_no_encoding_unicode(string.encoding->no_encoding) - && php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { - - if (php_mb_is_no_encoding_utf8(string.encoding->no_encoding)) { - if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff && - 0xe000 > MBSTRG(current_filter_illegal_substchar) - ) { - mbfl_buffer_converter_illegal_substchar(convd, 0x3f); - } else { - mbfl_buffer_converter_illegal_substchar(convd, - MBSTRG(current_filter_illegal_substchar)); - } - } else { - mbfl_buffer_converter_illegal_substchar(convd, - MBSTRG(current_filter_illegal_substchar)); - } - } else { - mbfl_buffer_converter_illegal_substchar(convd, 0x3f); - } + mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); /* do it */ ret = mbfl_buffer_converter_feed_result(convd, &string, &result); @@ -5017,17 +4998,7 @@ static inline char* php_mb_chr(zend_long cp, const char* enc_name, size_t *outpu if (php_mb_is_no_encoding_utf8(no_enc)) { if (0 > cp || cp > 0x10ffff || (cp > 0xd7ff && 0xe000 > cp)) { - if (php_mb_is_no_encoding_utf8(MBSTRG(current_internal_encoding)->no_encoding)) { - cp = MBSTRG(current_filter_illegal_substchar); - } else if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { - if (0xd800 > MBSTRG(current_filter_illegal_substchar) || MBSTRG(current_filter_illegal_substchar) > 0xdfff) { - cp = MBSTRG(current_filter_illegal_substchar); - } else { - cp = 0x3f; - } - } else { - cp = 0x3f; - } + cp = MBSTRG(current_filter_illegal_substchar); } if (cp < 0x80) { diff --git a/ext/mbstring/tests/bug69086.phpt b/ext/mbstring/tests/bug69086.phpt index 921d61cca4..9566e10968 100644 --- a/ext/mbstring/tests/bug69086.phpt +++ b/ext/mbstring/tests/bug69086.phpt @@ -8,7 +8,13 @@ mb_substitute_character(0xfffd); var_dump("?" === mb_convert_encoding("\x80", "Shift_JIS", "EUC-JP")); mb_internal_encoding("UCS-4BE"); var_dump("\x00\x00\xff\xfd" === mb_convert_encoding("\x80", "UCS-4BE", "UTF-8")); + +mb_internal_encoding("UTF-8"); +mb_substitute_character(0xfffd); +var_dump("\u{fffd}" === mb_convert_encoding("\x80", "UTF-8", "EUC-JP-2004")); + ?> --EXPECT-- bool(true) bool(true) +bool(true) diff --git a/ext/mbstring/tests/mb_chr.phpt b/ext/mbstring/tests/mb_chr.phpt index 19e1a704ec..8ec35920c3 100644 --- a/ext/mbstring/tests/mb_chr.phpt +++ b/ext/mbstring/tests/mb_chr.phpt @@ -22,7 +22,7 @@ var_dump( mb_internal_encoding("EUC-JP"); mb_substitute_character(0xa4a2); var_dump( - "?" === mb_chr(0xd800, "UTF-8") + "\u{a4a2}" === mb_chr(0xd800, "UTF-8") ); // Invalid diff --git a/ext/mbstring/tests/mb_substitute_character_variation2.phpt b/ext/mbstring/tests/mb_substitute_character_variation2.phpt index 202561afc7..6248174aa6 100644 --- a/ext/mbstring/tests/mb_substitute_character_variation2.phpt +++ b/ext/mbstring/tests/mb_substitute_character_variation2.phpt @@ -35,5 +35,5 @@ var_dump(bin2hex(mb_convert_encoding($string_mb, "ISO-8859-1", "UTF-8"))); string(14) "3f3f3f3f3f3f3f" string(14) "42424242424242" string(0) "" -string(0) "" +string(14) "3f3f3f3f3f3f3f" ===DONE=== |