summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2017-08-03 22:41:15 +0200
committerNikita Popov <nikita.ppv@gmail.com>2017-08-03 22:41:15 +0200
commit5caf05f6c53f0d2eb87e248944d1000a3277e12f (patch)
tree8fd7c3bacbe707a947b8731ca340ddddf4383697
parentc98714f19e6f033a7b45a24cf73fc9460cee3254 (diff)
parente53162a32b011ef22c3e0210e7af334d968f227c (diff)
downloadphp-git-5caf05f6c53f0d2eb87e248944d1000a3277e12f.tar.gz
Merge branch 'PHP-7.2'
-rw-r--r--ext/mbstring/mbstring.c148
-rw-r--r--ext/mbstring/tests/mb_chr.phpt22
-rw-r--r--ext/mbstring/tests/mb_ord.phpt2
3 files changed, 38 insertions, 134 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index a5430b4442..57f298eaae 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -112,8 +112,6 @@ static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
-static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc);
-
static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
/* }}} */
@@ -3015,13 +3013,6 @@ static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding
}
-/* See mbfl_no_encoding definition for list of unicode encodings */
-static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc)
-{
- return (no_enc >= mbfl_no_encoding_ucs4 && no_enc <= mbfl_no_encoding_utf8_sb);
-}
-
-
/* See mbfl_no_encoding definition for list of UTF-8 encodings */
static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
{
@@ -4893,7 +4884,7 @@ static inline zend_long php_mb_ord(const char* str, size_t str_len, const char*
const mbfl_encoding *enc;
enum mbfl_no_encoding no_enc;
char* ret;
- size_t ret_len, char_len;
+ size_t ret_len;
zend_long cp;
enc = php_mb_get_encoding(enc_name);
@@ -4902,50 +4893,21 @@ static inline zend_long php_mb_ord(const char* str, size_t str_len, const char*
}
no_enc = enc->no_encoding;
- if (php_mb_is_no_encoding_unicode(no_enc)) {
- ret = php_mb_convert_encoding_ex(str, str_len, &mbfl_encoding_ucs4be, enc, &ret_len);
-
- if (ret == NULL) {
- return -1;
- }
-
- cp = (unsigned char) ret[0] << 24 | \
- (unsigned char) ret[1] << 16 | \
- (unsigned char) ret[2] << 8 | \
- (unsigned char) ret[3];
-
- efree(ret);
-
- return cp;
-
- } else if (php_mb_is_unsupported_no_encoding(no_enc)) {
+ if (php_mb_is_unsupported_no_encoding(no_enc)) {
php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc_name);
return -1;
}
- ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len);
+ ret = php_mb_convert_encoding_ex(str, str_len, &mbfl_encoding_ucs4be, enc, &ret_len);
if (ret == NULL) {
return -1;
}
- char_len = php_mb_mbchar_bytes_ex(ret, enc);
-
- if (char_len == 1) {
- cp = (unsigned char) ret[0];
- } else if (char_len == 2) {
- cp = ((unsigned char) ret[0] << 8) | \
- (unsigned char) ret[1];
- } else if (char_len == 3) {
- cp = ((unsigned char) ret[0] << 16) | \
- ((unsigned char) ret[1] << 8) | \
- (unsigned char) ret[2];
- } else {
- cp = ((unsigned char) ret[0] << 24) | \
- ((unsigned char) ret[1] << 16) | \
- ((unsigned char) ret[2] << 8) | \
- (unsigned char) ret[3];
- }
+ cp = (unsigned char) ret[0] << 24 | \
+ (unsigned char) ret[1] << 16 | \
+ (unsigned char) ret[2] << 8 | \
+ (unsigned char) ret[3];
efree(ret);
@@ -4994,11 +4956,18 @@ static inline char* php_mb_chr(zend_long cp, const char* enc_name, size_t *outpu
}
no_enc = enc->no_encoding;
+ if (php_mb_is_unsupported_no_encoding(no_enc)) {
+ php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc_name);
+ return NULL;
+ }
- if (php_mb_is_no_encoding_utf8(no_enc)) {
+ if (cp < 0 || cp > 0x10ffff) {
+ return NULL;
+ }
- if (0 > cp || cp > 0x10ffff || (cp > 0xd7ff && 0xe000 > cp)) {
- cp = MBSTRG(current_filter_illegal_substchar);
+ if (php_mb_is_no_encoding_utf8(no_enc)) {
+ if (cp > 0xd7ff && 0xe000 > cp) {
+ return NULL;
}
if (cp < 0x80) {
@@ -5034,80 +5003,31 @@ static inline char* php_mb_chr(zend_long cp, const char* enc_name, size_t *outpu
}
return ret;
+ }
- } else if (php_mb_is_no_encoding_unicode(no_enc)) {
-
- if (0 > cp || 0x10ffff < cp) {
-
- if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
- cp = MBSTRG(current_filter_illegal_substchar);
- } else {
- cp = 0x3f;
- }
-
- }
-
- buf_len = 4;
- buf = (char *) safe_emalloc(buf_len, 1, 1);
- buf[0] = (cp >> 24) & 0xff;
- buf[1] = (cp >> 16) & 0xff;
- buf[2] = (cp >> 8) & 0xff;
- buf[3] = cp & 0xff;
- buf[4] = 0;
+ buf_len = 4;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = (cp >> 24) & 0xff;
+ buf[1] = (cp >> 16) & 0xff;
+ buf[2] = (cp >> 8) & 0xff;
+ buf[3] = cp & 0xff;
+ buf[4] = 0;
+ {
+ long orig_illegalchars = MBSTRG(illegalchars);
+ MBSTRG(illegalchars) = 0;
ret = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
- efree(buf);
-
- if (output_len) {
- *output_len = ret_len;
+ if (MBSTRG(illegalchars) != 0) {
+ efree(buf);
+ efree(ret);
+ MBSTRG(illegalchars) = orig_illegalchars;
+ return NULL;
}
- return ret;
-
- } else if (php_mb_is_unsupported_no_encoding(no_enc)) {
- php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc_name);
- return NULL;
+ MBSTRG(illegalchars) = orig_illegalchars;
}
- if (0 > cp || cp > 0x100000000) {
- if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) {
- cp = MBSTRG(current_filter_illegal_substchar);
- } else {
- cp = 0x3f;
- }
- }
-
- if (cp < 0x100) {
- buf_len = 1;
- buf = (char *) safe_emalloc(buf_len, 1, 1);
- buf[0] = cp;
- buf[1] = 0;
- } else if (cp < 0x10000) {
- buf_len = 2;
- buf = (char *) safe_emalloc(buf_len, 1, 1);
- buf[0] = cp >> 8;
- buf[1] = cp & 0xff;
- buf[2] = 0;
- } else if (cp < 0x1000000) {
- buf_len = 3;
- buf = (char *) safe_emalloc(buf_len, 1, 1);
- buf[0] = cp >> 16;
- buf[1] = (cp >> 8) & 0xff;
- buf[2] = cp & 0xff;
- buf[3] = 0;
- } else {
- buf_len = 4;
- buf = (char *) safe_emalloc(buf_len, 1, 1);
- buf[0] = cp >> 24;
- buf[1] = (cp >> 16) & 0xff;
- buf[2] = (cp >> 8) & 0xff;
- buf[3] = cp & 0xff;
- buf[4] = 0;
- }
-
- ret = php_mb_convert_encoding_ex(buf, buf_len, enc, enc, &ret_len);
efree(buf);
-
if (output_len) {
*output_len = ret_len;
}
diff --git a/ext/mbstring/tests/mb_chr.phpt b/ext/mbstring/tests/mb_chr.phpt
index 8ec35920c3..d61178af4d 100644
--- a/ext/mbstring/tests/mb_chr.phpt
+++ b/ext/mbstring/tests/mb_chr.phpt
@@ -6,23 +6,9 @@ mb_chr()
<?php
var_dump(
"\u{20bb7}" === mb_chr(0x20bb7),
- "\x8f\xa1\xef" === mb_chr(0x8fa1ef, "EUC-JP-2004"),
- "?" === mb_chr(0xd800)
-);
-
-mb_internal_encoding("UCS-4BE");
-mb_substitute_character(0xfffd);
-var_dump(
- "\u{fffd}" === mb_chr(0xd800, "UTF-8")
-);
-var_dump(
- "\u{fffd}" === mb_chr(0xd800, "UTF-8")
-);
-
-mb_internal_encoding("EUC-JP");
-mb_substitute_character(0xa4a2);
-var_dump(
- "\u{a4a2}" === mb_chr(0xd800, "UTF-8")
+ "\x8f\xa1\xef" === mb_chr(0x50aa, "EUC-JP-2004"),
+ false === mb_chr(0xd800),
+ false === mb_chr(0x1f600, "EUC-JP-2004")
);
// Invalid
@@ -39,8 +25,6 @@ bool(true)
bool(true)
bool(true)
bool(true)
-bool(true)
-bool(true)
Warning: mb_chr(): Unknown encoding "typo" in %s on line %d
diff --git a/ext/mbstring/tests/mb_ord.phpt b/ext/mbstring/tests/mb_ord.phpt
index e3f5343fd8..4bf0d0c0e4 100644
--- a/ext/mbstring/tests/mb_ord.phpt
+++ b/ext/mbstring/tests/mb_ord.phpt
@@ -7,7 +7,7 @@ mb_ord()
var_dump(
0x20bb7 === mb_ord("\u{20bb7}"),
0x3f === mb_ord("\u{d800}"),
- 0x8fa1ef === mb_ord("\x8f\xa1\xef", "EUC-JP-2004")
+ 0x50aa === mb_ord("\x8f\xa1\xef", "EUC-JP-2004")
);
// Invalid