summaryrefslogtreecommitdiff
path: root/ext/mbstring/mbstring.c
diff options
context:
space:
mode:
authorMasaki Kagaya <masakielastic@gmail.com>2016-08-14 06:29:23 +0900
committerMasaki Kagaya <masakielastic@gmail.com>2016-08-14 06:29:23 +0900
commitf49a5a67cfde6c318b53ac26c4e011e0c8483647 (patch)
tree59007db4bfb7c7f0edf53914fd8fa768d070f958 /ext/mbstring/mbstring.c
parenta22d27df4cdabe6cab516c1f967d56cc5a8fb891 (diff)
parent84a94ca62f5c2a4451fb6500fc017f313524a758 (diff)
downloadphp-git-f49a5a67cfde6c318b53ac26c4e011e0c8483647.tar.gz
delete duplicate functions
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r--ext/mbstring/mbstring.c892
1 files changed, 675 insertions, 217 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index a646d158e5..a0a1eee0a1 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -2,7 +2,7 @@
+----------------------------------------------------------------------+
| PHP Version 7 |
+----------------------------------------------------------------------+
- | Copyright (c) 1997-2015 The PHP Group |
+ | Copyright (c) 1997-2016 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
@@ -104,7 +104,6 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis
static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
-static inline int php_mb_check_encoding(const char *input, size_t length, const char *enc);
/* }}} */
/* {{{ php_mb_default_identify_list */
@@ -205,13 +204,6 @@ static const struct mb_overload_def mb_ovld[] = {
{MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
{MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
{MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
-#if HAVE_MBREGEX
- {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
- {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
- {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
- {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
- {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
-#endif
{0, NULL, NULL, NULL}
};
/* }}} */
@@ -438,6 +430,21 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1)
+ ZEND_ARG_INFO(0, str)
+ ZEND_ARG_INFO(0, encoding)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1)
+ ZEND_ARG_INFO(0, str)
+ ZEND_ARG_INFO(0, encoding)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1)
+ ZEND_ARG_INFO(0, cp)
+ ZEND_ARG_INFO(0, encoding)
+ZEND_END_ARG_INFO()
+
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
@@ -563,6 +570,9 @@ const zend_function_entry mbstring_functions[] = {
PHP_FE(mb_send_mail, arginfo_mb_send_mail)
PHP_FE(mb_get_info, arginfo_mb_get_info)
PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
+ PHP_FE(mb_ord, arginfo_mb_ord)
+ PHP_FE(mb_chr, arginfo_mb_chr)
+ PHP_FE(mb_scrub, arginfo_mb_scrub)
#if HAVE_MBREGEX
PHP_MBREGEX_FUNCTION_ENTRIES
#endif
@@ -580,7 +590,7 @@ zend_module_entry mbstring_module_entry = {
PHP_RINIT(mbstring),
PHP_RSHUTDOWN(mbstring),
PHP_MINFO(mbstring),
- NO_VERSION_YET,
+ PHP_MBSTRING_VERSION,
PHP_MODULE_GLOBALS(mbstring),
PHP_GINIT(mbstring),
PHP_GSHUTDOWN(mbstring),
@@ -599,7 +609,7 @@ static sapi_post_entry php_post_entries[] = {
#ifdef COMPILE_DL_MBSTRING
#ifdef ZTS
-ZEND_TSRMLS_CACHE_DEFINE();
+ZEND_TSRMLS_CACHE_DEFINE()
#endif
ZEND_GET_MODULE(mbstring)
#endif
@@ -695,8 +705,8 @@ static sapi_post_entry mbstr_post_entries[] = {
static int
php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
{
- int size, bauto, ret = SUCCESS;
- size_t n;
+ int bauto, ret = SUCCESS;
+ size_t n, size;
char *p, *p1, *p2, *endp, *tmpstr;
const mbfl_encoding **entry, **list;
@@ -1213,7 +1223,7 @@ static PHP_INI_MH(OnUpdate_mbstring_language)
{
enum mbfl_no_language no_language;
- no_language = mbfl_name2no_language(new_value->val);
+ no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
if (no_language == mbfl_no_language_invalid) {
MBSTRG(language) = mbfl_no_language_neutral;
return FAILURE;
@@ -1239,7 +1249,7 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order)
return SUCCESS;
}
- if (FAILURE == php_mb_parse_encoding_list(new_value->val, new_value->len, &list, &size, 1)) {
+ if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
return FAILURE;
}
@@ -1272,7 +1282,7 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input)
return SUCCESS;
}
- if (FAILURE == php_mb_parse_encoding_list(new_value->val, new_value->len, &list, &size, 1)) {
+ if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
return FAILURE;
}
@@ -1295,7 +1305,7 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output)
{
const mbfl_encoding *encoding;
- if (new_value == NULL || new_value->len == 0) {
+ if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
encoding = mbfl_name2encoding(get_output_encoding());
if (!encoding) {
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
@@ -1303,7 +1313,7 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output)
return SUCCESS;
}
} else {
- encoding = mbfl_name2encoding(new_value->val);
+ encoding = mbfl_name2encoding(ZSTR_VAL(new_value));
if (!encoding) {
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
@@ -1359,8 +1369,8 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
}
if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
- if (new_value && new_value->len) {
- return _php_mb_ini_mbstring_internal_encoding_set(new_value->val, new_value->len);
+ if (new_value && ZSTR_LEN(new_value)) {
+ return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
} else {
return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(), strlen(get_internal_encoding())+1);
}
@@ -1383,20 +1393,20 @@ static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
char *endptr = NULL;
if (new_value != NULL) {
- if (strcasecmp("none", new_value->val) == 0) {
+ if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
- } else if (strcasecmp("long", new_value->val) == 0) {
+ } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
- } else if (strcasecmp("entity", new_value->val) == 0) {
+ } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
} else {
MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
- if (new_value->len >0) {
- c = strtol(new_value->val, &endptr, 0);
+ if (ZSTR_LEN(new_value) > 0) {
+ c = strtol(ZSTR_VAL(new_value), &endptr, 0);
if (*endptr == '\0') {
MBSTRG(filter_illegal_substchar) = c;
MBSTRG(current_filter_illegal_substchar) = c;
@@ -1446,8 +1456,8 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
}
tmp = php_trim(new_value, NULL, 0, 3);
- if (tmp->len > 0) {
- if (!(re = _php_mb_compile_regex(tmp->val))) {
+ if (ZSTR_LEN(tmp) > 0) {
+ if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
zend_string_release(tmp);
return FAILURE;
}
@@ -1554,6 +1564,9 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring)
/* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)
{
+#if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
+ZEND_TSRMLS_CACHE_UPDATE();
+#endif
__mbfl_allocators = &_php_mb_allocators;
REGISTER_INI_ENTRIES();
@@ -1599,6 +1612,8 @@ PHP_MSHUTDOWN_FUNCTION(mbstring)
{
UNREGISTER_INI_ENTRIES();
+ zend_multibyte_restore_functions();
+
#if HAVE_MBREGEX
PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
@@ -1721,6 +1736,13 @@ PHP_MINFO_FUNCTION(mbstring)
snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
php_info_print_table_row(2, "libmbfl version", tmp);
}
+#if HAVE_ONIG
+ {
+ char tmp[256];
+ snprintf(tmp, sizeof(tmp), "%d.%d.%d", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
+ php_info_print_table_row(2, "oniguruma version", tmp);
+ }
+#endif
php_info_print_table_end();
php_info_print_table_start();
@@ -1749,7 +1771,7 @@ PHP_FUNCTION(mb_language)
} else {
zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
- php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", name->val);
+ php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name));
RETVAL_FALSE;
} else {
RETVAL_TRUE;
@@ -1970,109 +1992,6 @@ PHP_FUNCTION(mb_detect_order)
}
/* }}} */
-static const enum mbfl_no_encoding php_mb_unsupported_no_encoding_list[] = {
- mbfl_no_encoding_pass,
- mbfl_no_encoding_auto,
- mbfl_no_encoding_wchar,
- mbfl_no_encoding_byte2be,
- mbfl_no_encoding_byte2le,
- mbfl_no_encoding_byte4be,
- mbfl_no_encoding_byte4le,
- mbfl_no_encoding_base64,
- mbfl_no_encoding_uuencode,
- mbfl_no_encoding_html_ent,
- mbfl_no_encoding_qprint,
- mbfl_no_encoding_utf7,
- mbfl_no_encoding_utf7imap,
- mbfl_no_encoding_2022kr,
- mbfl_no_encoding_jis,
- mbfl_no_encoding_2022jp,
- mbfl_no_encoding_2022jpms,
- mbfl_no_encoding_jis_ms,
- mbfl_no_encoding_2022jp_2004,
- mbfl_no_encoding_2022jp_kddi,
- mbfl_no_encoding_cp50220,
- mbfl_no_encoding_cp50220raw,
- mbfl_no_encoding_cp50221,
- mbfl_no_encoding_cp50222
-};
-
-static inline int php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
-{
- int i;
- int size = sizeof(php_mb_unsupported_no_encoding_list)/sizeof(php_mb_unsupported_no_encoding_list[0]);
-
- for (i = 0; i < size; i++) {
-
- if (no_enc == php_mb_unsupported_no_encoding_list[i]) {
- return 1;
- }
-
- }
-
- return 0;
-}
-
-static const enum mbfl_no_encoding php_mb_no_encoding_unicode_list[] = {
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_utf8_docomo,
- mbfl_no_encoding_utf8_kddi_a,
- mbfl_no_encoding_utf8_kddi_b,
- mbfl_no_encoding_utf8_sb,
- mbfl_no_encoding_ucs4,
- mbfl_no_encoding_ucs4be,
- mbfl_no_encoding_ucs4le,
- mbfl_no_encoding_utf32,
- mbfl_no_encoding_utf32be,
- mbfl_no_encoding_utf32le,
- mbfl_no_encoding_ucs2,
- mbfl_no_encoding_ucs2be,
- mbfl_no_encoding_ucs2le,
- mbfl_no_encoding_utf16,
- mbfl_no_encoding_utf16be,
- mbfl_no_encoding_utf16le
-};
-
-static inline int php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc)
-{
- int i;
- int size = sizeof(php_mb_no_encoding_unicode_list)/sizeof(php_mb_no_encoding_unicode_list[0]);
-
- for (i = 0; i < size; i++) {
-
- if (no_enc == php_mb_no_encoding_unicode_list[i]) {
- return 1;
- }
-
- }
-
- return 0;
-}
-
-static const enum mbfl_no_encoding php_mb_no_encoding_utf8_list[] = {
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_utf8_docomo,
- mbfl_no_encoding_utf8_kddi_a,
- mbfl_no_encoding_utf8_kddi_b,
- mbfl_no_encoding_utf8_sb
-};
-
-static inline int php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
-{
- int i;
- int size = sizeof(php_mb_no_encoding_utf8_list)/sizeof(php_mb_no_encoding_utf8_list[0]);
-
- for (i = 0; i < size; i++) {
-
- if (no_enc == php_mb_no_encoding_utf8_list[i]) {
- return 1;
- }
-
- }
-
- return 0;
-}
-
static inline int php_mb_check_code_point(long cp)
{
enum mbfl_no_encoding no_enc;
@@ -2122,7 +2041,7 @@ static inline int php_mb_check_code_point(long cp)
} else {
buf_len = 4;
buf = (char *) safe_emalloc(buf_len, 1, 1);
- buf[0] = cp >> 24;
+ buf[0] = cp >> 24;
buf[1] = (cp >> 16) & 0xff;
buf[2] = (cp >> 8) & 0xff;
buf[3] = cp & 0xff;
@@ -2265,8 +2184,13 @@ PHP_FUNCTION(mb_parse_str)
detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
} else {
zval tmp;
- zend_array *symbol_table = zend_rebuild_symbol_table();
+ zend_array *symbol_table;
+ if (zend_forbid_dynamic_call("mb_parse_str() with a single argument") == FAILURE) {
+ efree(encstr);
+ return;
+ }
+ symbol_table = zend_rebuild_symbol_table();
ZVAL_ARR(&tmp, symbol_table);
detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
}
@@ -2390,14 +2314,21 @@ PHP_FUNCTION(mb_strlen)
int n;
mbfl_string string;
char *enc_name = NULL;
- size_t enc_name_len;
+ size_t enc_name_len, string_len;
mbfl_string_init(&string);
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
+ php_error_docref(NULL, E_WARNING, "String overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ string.len = (uint32_t)string_len;
+
string.no_language = MBSTRG(language);
if (enc_name == NULL) {
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
@@ -2423,10 +2354,10 @@ PHP_FUNCTION(mb_strlen)
PHP_FUNCTION(mb_strpos)
{
int n, reverse = 0;
- zend_long offset;
+ zend_long offset = 0, slen;
mbfl_string haystack, needle;
char *enc_name = NULL;
- size_t enc_name_len;
+ size_t enc_name_len, haystack_len, needle_len;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
@@ -2434,12 +2365,22 @@ PHP_FUNCTION(mb_strpos)
haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- offset = 0;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &enc_name, &enc_name_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
+ php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
+ php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ haystack.len = (uint32_t)haystack_len;
+ needle.len = (uint32_t)needle_len;
+
if (enc_name != NULL) {
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
@@ -2448,7 +2389,11 @@ PHP_FUNCTION(mb_strpos)
}
}
- if (offset < 0 || offset > mbfl_strlen(&haystack)) {
+ slen = mbfl_strlen(&haystack);
+ if (offset < 0) {
+ offset += slen;
+ }
+ if (offset < 0 || offset > slen) {
php_error_docref(NULL, E_WARNING, "Offset not contained in string");
RETURN_FALSE;
}
@@ -2489,7 +2434,7 @@ PHP_FUNCTION(mb_strrpos)
int n;
mbfl_string haystack, needle;
char *enc_name = NULL;
- size_t enc_name_len;
+ size_t enc_name_len, haystack_len, needle_len;
zval *zoffset = NULL;
long offset = 0, str_flg;
char *enc_name2 = NULL;
@@ -2502,10 +2447,21 @@ PHP_FUNCTION(mb_strrpos)
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
+ php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
+ php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ haystack.len = (uint32_t)haystack_len;
+ needle.len = (uint32_t)needle_len;
+
if (zoffset) {
if (Z_TYPE_P(zoffset) == IS_STRING) {
enc_name2 = Z_STRVAL_P(zoffset);
@@ -2584,17 +2540,27 @@ PHP_FUNCTION(mb_strrpos)
Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)
{
- int n;
- zend_long offset;
+ int n = -1;
+ zend_long offset = 0;
mbfl_string haystack, needle;
const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
- size_t from_encoding_len;
- n = -1;
- offset = 0;
+ size_t from_encoding_len, haystack_len, needle_len;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
return;
}
+
+ if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
+ php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
+ php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ haystack.len = (uint32_t)haystack_len;
+ needle.len = (uint32_t)needle_len;
+
if (needle.len == 0) {
php_error_docref(NULL, E_WARNING, "Empty delimiter");
RETURN_FALSE;
@@ -2613,18 +2579,27 @@ PHP_FUNCTION(mb_stripos)
Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)
{
- int n;
- zend_long offset;
+ int n = -1;
+ zend_long offset = 0;
mbfl_string haystack, needle;
const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
- size_t from_encoding_len;
- n = -1;
- offset = 0;
+ size_t from_encoding_len, haystack_len, needle_len;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
+ php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
+ php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ haystack.len = (uint32_t)haystack_len;
+ needle.len = (uint32_t)needle_len;
+
n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
if (n >= 0) {
@@ -2642,7 +2617,7 @@ PHP_FUNCTION(mb_strstr)
int n, len, mblen;
mbfl_string haystack, needle, result, *ret = NULL;
char *enc_name = NULL;
- size_t enc_name_len;
+ size_t enc_name_len, haystack_len, needle_len;
zend_bool part = 0;
mbfl_string_init(&haystack);
@@ -2652,10 +2627,21 @@ PHP_FUNCTION(mb_strstr)
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
+ php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
+ php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ haystack.len = (uint32_t)haystack_len;
+ needle.len = (uint32_t)needle_len;
+
if (enc_name != NULL) {
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
@@ -2704,7 +2690,7 @@ PHP_FUNCTION(mb_strrchr)
int n, len, mblen;
mbfl_string haystack, needle, result, *ret = NULL;
char *enc_name = NULL;
- size_t enc_name_len;
+ size_t enc_name_len, haystack_len, needle_len;
zend_bool part = 0;
mbfl_string_init(&haystack);
@@ -2714,10 +2700,21 @@ PHP_FUNCTION(mb_strrchr)
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
+ php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
+ php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ haystack.len = (uint32_t)haystack_len;
+ needle.len = (uint32_t)needle_len;
+
if (enc_name != NULL) {
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
@@ -2766,7 +2763,7 @@ PHP_FUNCTION(mb_strrchr)
PHP_FUNCTION(mb_stristr)
{
zend_bool part = 0;
- size_t from_encoding_len, len, mblen;
+ size_t from_encoding_len, len, mblen, haystack_len, needle_len;
int n;
mbfl_string haystack, needle, result, *ret = NULL;
const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
@@ -2778,10 +2775,21 @@ PHP_FUNCTION(mb_stristr)
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
+ php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
+ php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ haystack.len = (uint32_t)haystack_len;
+ needle.len = (uint32_t)needle_len;
+
if (!needle.len) {
php_error_docref(NULL, E_WARNING, "Empty delimiter");
RETURN_FALSE;
@@ -2830,7 +2838,7 @@ PHP_FUNCTION(mb_strrichr)
{
zend_bool part = 0;
int n, len, mblen;
- size_t from_encoding_len;
+ size_t from_encoding_len, haystack_len, needle_len;
mbfl_string haystack, needle, result, *ret = NULL;
const char *from_encoding = MBSTRG(current_internal_encoding)->name;
mbfl_string_init(&haystack);
@@ -2841,10 +2849,21 @@ PHP_FUNCTION(mb_strrichr)
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
+ php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
+ php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ haystack.len = (uint32_t)haystack_len;
+ needle.len = (uint32_t)needle_len;
+
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
@@ -2889,7 +2908,7 @@ PHP_FUNCTION(mb_substr_count)
int n;
mbfl_string haystack, needle;
char *enc_name = NULL;
- size_t enc_name_len;
+ size_t enc_name_len, haystack_len, needle_len;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
@@ -2898,10 +2917,21 @@ PHP_FUNCTION(mb_substr_count)
needle.no_language = MBSTRG(language);
needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &enc_name, &enc_name_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
+ php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
+ php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ haystack.len = (uint32_t)haystack_len;
+ needle.len = (uint32_t)needle_len;
+
if (enc_name != NULL) {
haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
@@ -2928,15 +2958,14 @@ PHP_FUNCTION(mb_substr_count)
Returns part of a string */
PHP_FUNCTION(mb_substr)
{
- size_t argc = ZEND_NUM_ARGS();
- char *str, *encoding;
+ char *str, *encoding = NULL;
zend_long from, len;
int mblen;
size_t str_len, encoding_len;
- zval *z_len = NULL;
+ zend_bool len_is_null = 1;
mbfl_string string, result, *ret;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", &str, &str_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
return;
}
@@ -2944,7 +2973,7 @@ PHP_FUNCTION(mb_substr)
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (argc == 4) {
+ if (encoding) {
string.no_encoding = mbfl_name2no_encoding(encoding);
if (string.no_encoding == mbfl_no_encoding_invalid) {
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
@@ -2955,11 +2984,8 @@ PHP_FUNCTION(mb_substr)
string.val = (unsigned char *)str;
string.len = str_len;
- if (argc < 3 || Z_TYPE_P(z_len) == IS_NULL) {
+ if (len_is_null) {
len = str_len;
- } else {
- convert_to_long_ex(z_len);
- len = Z_LVAL_P(z_len);
}
/* measures length */
@@ -3008,22 +3034,28 @@ PHP_FUNCTION(mb_substr)
Returns part of a string */
PHP_FUNCTION(mb_strcut)
{
- size_t argc = ZEND_NUM_ARGS();
- char *encoding;
+ char *encoding = NULL;
zend_long from, len;
- size_t encoding_len;
- zval *z_len = NULL;
+ size_t encoding_len, string_len;
+ zend_bool len_is_null = 1;
mbfl_string string, result, *ret;
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", (char **)&string.val, &string_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
return;
}
- if (argc == 4) {
+ if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
+ php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ string.len = (uint32_t)string_len;
+
+ if (encoding) {
string.no_encoding = mbfl_name2no_encoding(encoding);
if (string.no_encoding == mbfl_no_encoding_invalid) {
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
@@ -3031,11 +3063,8 @@ PHP_FUNCTION(mb_strcut)
}
}
- if (argc < 3 || Z_TYPE_P(z_len) == IS_NULL) {
+ if (len_is_null) {
len = string.len;
- } else {
- convert_to_long_ex(z_len);
- len = Z_LVAL_P(z_len);
}
/* if "from" position is negative, count start position from the end
@@ -3080,17 +3109,24 @@ PHP_FUNCTION(mb_strwidth)
int n;
mbfl_string string;
char *enc_name = NULL;
- size_t enc_name_len;
+ size_t enc_name_len, string_len;
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
+ php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ string.len = (uint32_t)string_len;
+
if (enc_name != NULL) {
string.no_encoding = mbfl_name2no_encoding(enc_name);
if (string.no_encoding == mbfl_no_encoding_invalid) {
@@ -3112,8 +3148,8 @@ PHP_FUNCTION(mb_strwidth)
Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)
{
- char *str, *trimmarker, *encoding;
- zend_long from, width;
+ char *str, *trimmarker = NULL, *encoding = NULL;
+ zend_long from, width, swidth;
size_t str_len, trimmarker_len, encoding_len;
mbfl_string string, result, marker, *ret;
@@ -3130,7 +3166,7 @@ PHP_FUNCTION(mb_strimwidth)
marker.val = NULL;
marker.len = 0;
- if (ZEND_NUM_ARGS() == 5) {
+ if (encoding) {
string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
if (string.no_encoding == mbfl_no_encoding_invalid) {
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
@@ -3141,17 +3177,29 @@ PHP_FUNCTION(mb_strimwidth)
string.val = (unsigned char *)str;
string.len = str_len;
+ if ((from < 0) || (width < 0)) {
+ swidth = mbfl_strwidth(&string);
+ }
+
+ if (from < 0) {
+ from += swidth;
+ }
+
if (from < 0 || (size_t)from > str_len) {
php_error_docref(NULL, E_WARNING, "Start position is out of range");
RETURN_FALSE;
}
if (width < 0) {
- php_error_docref(NULL, E_WARNING, "Width is negative value");
+ width = swidth + width - from;
+ }
+
+ if (width < 0) {
+ php_error_docref(NULL, E_WARNING, "Width is out of range");
RETURN_FALSE;
}
- if (ZEND_NUM_ARGS() >= 4) {
+ if (trimmarker) {
marker.val = (unsigned char *)trimmarker;
marker.len = trimmarker_len;
}
@@ -3167,6 +3215,31 @@ PHP_FUNCTION(mb_strimwidth)
}
/* }}} */
+
+/* See mbfl_no_encoding definition for list of unsupported encodings */
+static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
+{
+ return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
+ || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
+ || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
+ || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
+}
+
+
+/* See mbfl_no_encoding definition for list of unicode encodings */
+static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc)
+{
+ return (no_enc >= mbfl_no_encoding_ucs4 && no_enc <= mbfl_no_encoding_utf8_sb);
+}
+
+
+/* See mbfl_no_encoding definition for list of UTF-8 encodings */
+static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
+{
+ return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
+}
+
+
/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
{
@@ -3237,7 +3310,28 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
return NULL;
}
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+
+ if (string.no_encoding == MBSTRG(current_internal_encoding)->no_encoding) {
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+ } else if (php_mb_is_no_encoding_unicode(string.no_encoding) && php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
+
+ if (php_mb_is_no_encoding_utf8(string.no_encoding)) {
+
+ if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff &&
+ 0xe000 > MBSTRG(current_filter_illegal_substchar)
+ ) {
+ mbfl_buffer_converter_illegal_substchar(convd, 0x3f);
+ } else {
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+ }
+
+ } else {
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+ }
+
+ } else {
+ mbfl_buffer_converter_illegal_substchar(convd, 0x3f);
+ }
/* do it */
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
@@ -3260,7 +3354,7 @@ PHP_FUNCTION(mb_convert_encoding)
{
char *arg_str, *arg_new;
size_t str_len, new_len;
- zval *arg_old;
+ zval *arg_old = NULL;
size_t size, l, n;
char *_from_encodings = NULL, *ret, *s_free = NULL;
@@ -3271,7 +3365,7 @@ PHP_FUNCTION(mb_convert_encoding)
return;
}
- if (ZEND_NUM_ARGS() == 3) {
+ if (arg_old) {
switch (Z_TYPE_P(arg_old)) {
case IS_ARRAY:
target_hash = Z_ARRVAL_P(arg_old);
@@ -3409,21 +3503,21 @@ PHP_FUNCTION(mb_detect_encoding)
char *str;
size_t str_len;
zend_bool strict=0;
- zval *encoding_list;
+ zval *encoding_list = NULL;
mbfl_string string;
const mbfl_encoding *ret;
const mbfl_encoding **elist, **list;
size_t size;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) {
return;
}
/* make encoding list */
list = NULL;
size = 0;
- if (ZEND_NUM_ARGS() >= 2 && !Z_ISNULL_P(encoding_list)) {
+ if (encoding_list) {
switch (Z_TYPE_P(encoding_list)) {
case IS_ARRAY:
if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) {
@@ -3487,6 +3581,10 @@ PHP_FUNCTION(mb_list_encodings)
const mbfl_encoding *encoding;
int i;
+ if (zend_parse_parameters_none() == FAILURE) {
+ return;
+ }
+
array_init(return_value);
i = 0;
encodings = mbfl_get_supported_encodings();
@@ -3535,17 +3633,24 @@ PHP_FUNCTION(mb_encode_mimeheader)
char *trans_enc_name = NULL;
size_t trans_enc_name_len;
char *linefeed = "\r\n";
- size_t linefeed_len;
+ size_t linefeed_len, string_len;
zend_long indent = 0;
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string_len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
+ php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ string.len = (uint32_t)string_len;
+
charset = mbfl_no_encoding_pass;
transenc = mbfl_no_encoding_base64;
@@ -3588,15 +3693,23 @@ PHP_FUNCTION(mb_encode_mimeheader)
PHP_FUNCTION(mb_decode_mimeheader)
{
mbfl_string string, result, *ret;
+ size_t string_len;
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string.len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
+ php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ string.len = (uint32_t)string_len;
+
mbfl_string_init(&result);
ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
if (ret != NULL) {
@@ -3618,16 +3731,23 @@ PHP_FUNCTION(mb_convert_kana)
char *optstr = NULL;
size_t optstr_len;
char *encname = NULL;
- size_t encname_len;
+ size_t encname_len, string_len;
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string_len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
return;
}
+ if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
+ php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
+ return;
+ }
+
+ string.len = (uint32_t)string_len;
+
/* option */
if (optstr != NULL) {
char *p = optstr;
@@ -3948,17 +4068,16 @@ detect_end:
static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
{
- char *str, *encoding;
+ char *str, *encoding = NULL;
size_t str_len, encoding_len;
zval *zconvmap, *hash_entry;
HashTable *target_hash;
- int argc = ZEND_NUM_ARGS();
int i, *convmap, *mapelm, mapsize=0;
zend_bool is_hex = 0;
mbfl_string string, result, *ret;
enum mbfl_no_encoding no_encoding;
- if (zend_parse_parameters(argc, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
return;
}
@@ -3969,7 +4088,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
string.len = str_len;
/* encoding */
- if ((argc == 3 || argc == 4) && encoding_len > 0) {
+ if (encoding && encoding_len > 0) {
no_encoding = mbfl_name2no_encoding(encoding);
if (no_encoding == mbfl_no_encoding_invalid) {
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
@@ -3979,10 +4098,8 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
}
}
- if (argc == 4) {
- if (type == 0 && is_hex) {
- type = 2; /* output in hex format */
- }
+ if (type == 0 && is_hex) {
+ type = 2; /* output in hex format */
}
/* conversion map */
@@ -4162,7 +4279,7 @@ static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t
zval val;
/* FIXME: some locale free implementation is
* really required here,,, */
- php_strtoupper(fld_name->val, fld_name->len);
+ php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
ZVAL_STR(&val, fld_val);
zend_hash_update(ht, fld_name, &val);
@@ -4210,7 +4327,7 @@ out:
zval val;
/* FIXME: some locale free implementation is
* really required here,,, */
- php_strtoupper(fld_name->val, fld_name->len);
+ php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
ZVAL_STR(&val, fld_val);
zend_hash_update(ht, fld_name, &val);
@@ -4284,7 +4401,7 @@ PHP_FUNCTION(mb_send_mail)
MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
}
if (extra_cmd) {
- MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd->val, extra_cmd->len);
+ MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
}
zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
@@ -4293,11 +4410,12 @@ PHP_FUNCTION(mb_send_mail)
_php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
}
- if ((s = zend_hash_str_find_ptr(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
+ if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
char *tmp;
char *param_name;
char *charset = NULL;
+ ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
p = strchr(Z_STRVAL_P(s), ';');
if (p != NULL) {
@@ -4328,9 +4446,10 @@ PHP_FUNCTION(mb_send_mail)
suppressed_hdrs.cnt_type = 1;
}
- if ((s = zend_hash_str_find_ptr(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
+ if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
enum mbfl_no_encoding _body_enc;
+ ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
_body_enc = mbfl_name2no_encoding(Z_STRVAL_P(s));
switch (_body_enc) {
case mbfl_no_encoding_base64:
@@ -4472,10 +4591,10 @@ PHP_FUNCTION(mb_send_mail)
if (force_extra_parameters) {
extra_cmd = php_escape_shell_cmd(force_extra_parameters);
} else if (extra_cmd) {
- extra_cmd = php_escape_shell_cmd(extra_cmd->val);
+ extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
}
- if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? extra_cmd->val : NULL)) {
+ if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
RETVAL_TRUE;
} else {
RETVAL_FALSE;
@@ -4684,7 +4803,7 @@ PHP_FUNCTION(mb_get_info)
}
/* }}} */
-static inline int php_mb_check_encoding(const char *input, size_t length, const char *enc)
+MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
{
const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
mbfl_buffer_converter *convd;
@@ -4757,6 +4876,342 @@ PHP_FUNCTION(mb_check_encoding)
}
/* }}} */
+
+static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc)
+{
+ enum mbfl_no_encoding no_enc;
+ char* ret;
+ size_t ret_len;
+ const mbfl_encoding *encoding;
+ unsigned char char_len;
+ zend_long cp;
+
+ if (enc == NULL) {
+ no_enc = MBSTRG(current_internal_encoding)->no_encoding;
+ } else {
+ no_enc = mbfl_name2no_encoding(enc);
+
+ if (no_enc == mbfl_no_encoding_invalid) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
+ return -1;
+ }
+ }
+
+ if (php_mb_is_no_encoding_unicode(no_enc)) {
+
+ ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len);
+
+ if (ret == NULL) {
+ return -1;
+ }
+
+ cp = (unsigned char) ret[0] << 24 | \
+ (unsigned char) ret[1] << 16 | \
+ (unsigned char) ret[2] << 8 | \
+ (unsigned char) ret[3];
+
+ efree(ret);
+
+ return cp;
+
+ } else if (php_mb_is_unsupported_no_encoding(no_enc)) {
+ php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
+ return -1;
+ }
+
+ ret = php_mb_convert_encoding(str, str_len, enc, enc, &ret_len);
+
+ if (ret == NULL) {
+ return -1;
+ }
+
+ encoding = mbfl_no2encoding(no_enc);
+ char_len = php_mb_mbchar_bytes_ex(ret, encoding);
+
+ if (char_len == 1) {
+ cp = (unsigned char) ret[0];
+ } else if (char_len == 2) {
+ cp = ((unsigned char) ret[0] << 8) | \
+ (unsigned char) ret[1];
+ } else if (char_len == 3) {
+ cp = ((unsigned char) ret[0] << 16) | \
+ ((unsigned char) ret[1] << 8) | \
+ (unsigned char) ret[2];
+ } else {
+ cp = ((unsigned char) ret[0] << 24) | \
+ ((unsigned char) ret[1] << 16) | \
+ ((unsigned char) ret[2] << 8) | \
+ (unsigned char) ret[3];
+ }
+
+ efree(ret);
+
+ return cp;
+}
+
+
+/* {{{ proto bool mb_ord([string str[, string encoding]]) */
+PHP_FUNCTION(mb_ord)
+{
+ char* str;
+ size_t str_len;
+ char* enc = NULL;
+ size_t enc_len;
+ zend_long cp;
+
+#ifndef FAST_ZPP
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) {
+ return;
+ }
+#else
+ ZEND_PARSE_PARAMETERS_START(1, 2)
+ Z_PARAM_STRING(str, str_len)
+ Z_PARAM_OPTIONAL
+ Z_PARAM_STRING(enc, enc_len)
+ ZEND_PARSE_PARAMETERS_END();
+#endif
+
+ cp = php_mb_ord(str, str_len, enc);
+
+ if (0 > cp) {
+ RETURN_FALSE;
+ }
+
+ RETURN_LONG(cp);
+}
+/* }}} */
+
+
+static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len)
+{
+ enum mbfl_no_encoding no_enc;
+ char* buf;
+ size_t buf_len;
+ char* ret;
+ size_t ret_len;
+
+ if (enc == NULL) {
+ no_enc = MBSTRG(current_internal_encoding)->no_encoding;
+ } else {
+ no_enc = mbfl_name2no_encoding(enc);
+ if (no_enc == mbfl_no_encoding_invalid) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
+ return NULL;
+ }
+ }
+
+ if (php_mb_is_no_encoding_utf8(no_enc)) {
+
+ if (0 > cp || cp > 0x10ffff || (cp > 0xd7ff && 0xe000 > cp)) {
+ if (php_mb_is_no_encoding_utf8(MBSTRG(current_internal_encoding)->no_encoding)) {
+ cp = MBSTRG(current_filter_illegal_substchar);
+ } else if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
+ if (0xd800 > MBSTRG(current_filter_illegal_substchar) || MBSTRG(current_filter_illegal_substchar) > 0xdfff) {
+ cp = MBSTRG(current_filter_illegal_substchar);
+ } else {
+ cp = 0x3f;
+ }
+ } else {
+ cp = 0x3f;
+ }
+ }
+
+ if (cp < 0x80) {
+ ret_len = 1;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = cp;
+ ret[1] = 0;
+ } else if (cp < 0x800) {
+ ret_len = 2;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = 0xc0 | (cp >> 6);
+ ret[1] = 0x80 | (cp & 0x3f);
+ ret[2] = 0;
+ } else if (cp < 0x10000) {
+ ret_len = 3;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = 0xe0 | (cp >> 12);
+ ret[1] = 0x80 | ((cp >> 6) & 0x3f);
+ ret[2] = 0x80 | (cp & 0x3f);
+ ret[3] = 0;
+ } else {
+ ret_len = 4;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = 0xf0 | (cp >> 18);
+ ret[1] = 0x80 | ((cp >> 12) & 0x3f);
+ ret[2] = 0x80 | ((cp >> 6) & 0x3f);
+ ret[3] = 0x80 | (cp & 0x3f);
+ ret[4] = 0;
+ }
+
+ if (output_len) {
+ *output_len = ret_len;
+ }
+
+ return ret;
+
+ } else if (php_mb_is_no_encoding_unicode(no_enc)) {
+
+ if (0 > cp || 0x10ffff < cp) {
+
+ if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
+ cp = MBSTRG(current_filter_illegal_substchar);
+ } else {
+ cp = 0x3f;
+ }
+
+ }
+
+ buf_len = 4;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = (cp >> 24) & 0xff;
+ buf[1] = (cp >> 16) & 0xff;
+ buf[2] = (cp >> 8) & 0xff;
+ buf[3] = cp & 0xff;
+ buf[4] = 0;
+
+ ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len);
+ efree(buf);
+
+ if (output_len) {
+ *output_len = ret_len;
+ }
+
+ return ret;
+
+ } else if (php_mb_is_unsupported_no_encoding(no_enc)) {
+ php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
+ return NULL;
+ }
+
+ if (0 > cp || cp > 0x100000000) {
+ if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) {
+ cp = MBSTRG(current_filter_illegal_substchar);
+ } else {
+ cp = 0x3f;
+ }
+ }
+
+ if (cp < 0x100) {
+ buf_len = 1;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = cp;
+ buf[1] = 0;
+ } else if (cp < 0x10000) {
+ buf_len = 2;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = cp >> 8;
+ buf[1] = cp & 0xff;
+ buf[2] = 0;
+ } else if (cp < 0x1000000) {
+ buf_len = 3;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = cp >> 16;
+ buf[1] = (cp >> 8) & 0xff;
+ buf[2] = cp & 0xff;
+ buf[3] = 0;
+ } else {
+ buf_len = 4;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = cp >> 24;
+ buf[1] = (cp >> 16) & 0xff;
+ buf[2] = (cp >> 8) & 0xff;
+ buf[3] = cp & 0xff;
+ buf[4] = 0;
+ }
+
+ ret = php_mb_convert_encoding(buf, buf_len, enc, enc, &ret_len);
+ efree(buf);
+
+ if (output_len) {
+ *output_len = ret_len;
+ }
+
+ return ret;
+}
+
+
+/* {{{ proto bool mb_ord([int cp[, string encoding]]) */
+PHP_FUNCTION(mb_chr)
+{
+ zend_long cp;
+ char* enc = NULL;
+ size_t enc_len;
+ char* ret;
+ size_t ret_len;
+
+#ifndef FAST_ZPP
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l|s", &cp, &enc, &enc_len) == FAILURE) {
+ return;
+ }
+#else
+ ZEND_PARSE_PARAMETERS_START(1, 2)
+ Z_PARAM_LONG(cp)
+ Z_PARAM_OPTIONAL
+ Z_PARAM_STRING(enc, enc_len)
+ ZEND_PARSE_PARAMETERS_END();
+#endif
+
+ ret = php_mb_chr(cp, enc, &ret_len);
+
+ if (ret == NULL) {
+ RETURN_FALSE;
+ }
+
+ RETVAL_STRING(ret);
+ efree(ret);
+}
+/* }}} */
+
+
+static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc)
+{
+ size_t ret_len;
+
+ return php_mb_convert_encoding(str, str_len, enc, enc, &ret_len);
+}
+
+
+/* {{{ proto bool mb_scrub([string str[, string encoding]]) */
+PHP_FUNCTION(mb_scrub)
+{
+ char* str;
+ size_t str_len;
+ char *enc = NULL;
+ size_t enc_len;
+ char *ret;
+
+#ifndef FAST_ZPP
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) {
+ return;
+ }
+#else
+ ZEND_PARSE_PARAMETERS_START(1, 2)
+ Z_PARAM_STRING(str, str_len)
+ Z_PARAM_OPTIONAL
+ Z_PARAM_STRING(enc, enc_len)
+ZEND_PARSE_PARAMETERS_END();
+#endif
+
+ if (enc == NULL) {
+ enc = (char *) MBSTRG(current_internal_encoding)->name;
+ } else if (!mbfl_is_support_encoding(enc)) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
+ RETURN_FALSE;
+ }
+
+ ret = php_mb_scrub(str, str_len, enc);
+
+ if (ret == NULL) {
+ RETURN_FALSE;
+ }
+
+ RETVAL_STRING(ret);
+ efree(ret);
+}
+/* }}} */
+
+
/* {{{ php_mb_populate_current_detect_order_list */
static void php_mb_populate_current_detect_order_list(void)
{
@@ -4920,6 +5375,9 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int
break;
}
} else {
+ if (offset < 0) {
+ offset += (long)haystack_char_len;
+ }
if (offset < 0 || offset > haystack_char_len) {
php_error_docref(NULL, E_WARNING, "Offset not contained in string");
break;