diff options
author | Masaki Kagaya <masakielastic@gmail.com> | 2016-08-14 06:29:23 +0900 |
---|---|---|
committer | Masaki Kagaya <masakielastic@gmail.com> | 2016-08-14 06:29:23 +0900 |
commit | f49a5a67cfde6c318b53ac26c4e011e0c8483647 (patch) | |
tree | 59007db4bfb7c7f0edf53914fd8fa768d070f958 /ext/mbstring/mbstring.c | |
parent | a22d27df4cdabe6cab516c1f967d56cc5a8fb891 (diff) | |
parent | 84a94ca62f5c2a4451fb6500fc017f313524a758 (diff) | |
download | php-git-f49a5a67cfde6c318b53ac26c4e011e0c8483647.tar.gz |
delete duplicate functions
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r-- | ext/mbstring/mbstring.c | 892 |
1 files changed, 675 insertions, 217 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index a646d158e5..a0a1eee0a1 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 7 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2015 The PHP Group | + | Copyright (c) 1997-2016 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -104,7 +104,6 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding); -static inline int php_mb_check_encoding(const char *input, size_t length, const char *enc); /* }}} */ /* {{{ php_mb_default_identify_list */ @@ -205,13 +204,6 @@ static const struct mb_overload_def mb_ovld[] = { {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"}, {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"}, {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"}, -#if HAVE_MBREGEX - {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"}, - {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"}, - {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"}, - {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"}, - {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"}, -#endif {0, NULL, NULL, NULL} }; /* }}} */ @@ -438,6 +430,21 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1) + ZEND_ARG_INFO(0, str) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1) + ZEND_ARG_INFO(0, str) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1) + ZEND_ARG_INFO(0, cp) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() @@ -563,6 +570,9 @@ const zend_function_entry mbstring_functions[] = { PHP_FE(mb_send_mail, arginfo_mb_send_mail) PHP_FE(mb_get_info, arginfo_mb_get_info) PHP_FE(mb_check_encoding, arginfo_mb_check_encoding) + PHP_FE(mb_ord, arginfo_mb_ord) + PHP_FE(mb_chr, arginfo_mb_chr) + PHP_FE(mb_scrub, arginfo_mb_scrub) #if HAVE_MBREGEX PHP_MBREGEX_FUNCTION_ENTRIES #endif @@ -580,7 +590,7 @@ zend_module_entry mbstring_module_entry = { PHP_RINIT(mbstring), PHP_RSHUTDOWN(mbstring), PHP_MINFO(mbstring), - NO_VERSION_YET, + PHP_MBSTRING_VERSION, PHP_MODULE_GLOBALS(mbstring), PHP_GINIT(mbstring), PHP_GSHUTDOWN(mbstring), @@ -599,7 +609,7 @@ static sapi_post_entry php_post_entries[] = { #ifdef COMPILE_DL_MBSTRING #ifdef ZTS -ZEND_TSRMLS_CACHE_DEFINE(); +ZEND_TSRMLS_CACHE_DEFINE() #endif ZEND_GET_MODULE(mbstring) #endif @@ -695,8 +705,8 @@ static sapi_post_entry mbstr_post_entries[] = { static int php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent) { - int size, bauto, ret = SUCCESS; - size_t n; + int bauto, ret = SUCCESS; + size_t n, size; char *p, *p1, *p2, *endp, *tmpstr; const mbfl_encoding **entry, **list; @@ -1213,7 +1223,7 @@ static PHP_INI_MH(OnUpdate_mbstring_language) { enum mbfl_no_language no_language; - no_language = mbfl_name2no_language(new_value->val); + no_language = mbfl_name2no_language(ZSTR_VAL(new_value)); if (no_language == mbfl_no_language_invalid) { MBSTRG(language) = mbfl_no_language_neutral; return FAILURE; @@ -1239,7 +1249,7 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order) return SUCCESS; } - if (FAILURE == php_mb_parse_encoding_list(new_value->val, new_value->len, &list, &size, 1)) { + if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) { return FAILURE; } @@ -1272,7 +1282,7 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input) return SUCCESS; } - if (FAILURE == php_mb_parse_encoding_list(new_value->val, new_value->len, &list, &size, 1)) { + if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) { return FAILURE; } @@ -1295,7 +1305,7 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output) { const mbfl_encoding *encoding; - if (new_value == NULL || new_value->len == 0) { + if (new_value == NULL || ZSTR_LEN(new_value) == 0) { encoding = mbfl_name2encoding(get_output_encoding()); if (!encoding) { MBSTRG(http_output_encoding) = &mbfl_encoding_pass; @@ -1303,7 +1313,7 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output) return SUCCESS; } } else { - encoding = mbfl_name2encoding(new_value->val); + encoding = mbfl_name2encoding(ZSTR_VAL(new_value)); if (!encoding) { MBSTRG(http_output_encoding) = &mbfl_encoding_pass; MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; @@ -1359,8 +1369,8 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) } if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) { - if (new_value && new_value->len) { - return _php_mb_ini_mbstring_internal_encoding_set(new_value->val, new_value->len); + if (new_value && ZSTR_LEN(new_value)) { + return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value)); } else { return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(), strlen(get_internal_encoding())+1); } @@ -1383,20 +1393,20 @@ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) char *endptr = NULL; if (new_value != NULL) { - if (strcasecmp("none", new_value->val) == 0) { + if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) { MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; - } else if (strcasecmp("long", new_value->val) == 0) { + } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) { MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; - } else if (strcasecmp("entity", new_value->val) == 0) { + } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) { MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; } else { MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; - if (new_value->len >0) { - c = strtol(new_value->val, &endptr, 0); + if (ZSTR_LEN(new_value) > 0) { + c = strtol(ZSTR_VAL(new_value), &endptr, 0); if (*endptr == '\0') { MBSTRG(filter_illegal_substchar) = c; MBSTRG(current_filter_illegal_substchar) = c; @@ -1446,8 +1456,8 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes) } tmp = php_trim(new_value, NULL, 0, 3); - if (tmp->len > 0) { - if (!(re = _php_mb_compile_regex(tmp->val))) { + if (ZSTR_LEN(tmp) > 0) { + if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) { zend_string_release(tmp); return FAILURE; } @@ -1554,6 +1564,9 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring) /* {{{ PHP_MINIT_FUNCTION(mbstring) */ PHP_MINIT_FUNCTION(mbstring) { +#if defined(COMPILE_DL_MBSTRING) && defined(ZTS) +ZEND_TSRMLS_CACHE_UPDATE(); +#endif __mbfl_allocators = &_php_mb_allocators; REGISTER_INI_ENTRIES(); @@ -1599,6 +1612,8 @@ PHP_MSHUTDOWN_FUNCTION(mbstring) { UNREGISTER_INI_ENTRIES(); + zend_multibyte_restore_functions(); + #if HAVE_MBREGEX PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif @@ -1721,6 +1736,13 @@ PHP_MINFO_FUNCTION(mbstring) snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY); php_info_print_table_row(2, "libmbfl version", tmp); } +#if HAVE_ONIG + { + char tmp[256]; + snprintf(tmp, sizeof(tmp), "%d.%d.%d", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY); + php_info_print_table_row(2, "oniguruma version", tmp); + } +#endif php_info_print_table_end(); php_info_print_table_start(); @@ -1749,7 +1771,7 @@ PHP_FUNCTION(mb_language) } else { zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0); if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) { - php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", name->val); + php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name)); RETVAL_FALSE; } else { RETVAL_TRUE; @@ -1970,109 +1992,6 @@ PHP_FUNCTION(mb_detect_order) } /* }}} */ -static const enum mbfl_no_encoding php_mb_unsupported_no_encoding_list[] = { - mbfl_no_encoding_pass, - mbfl_no_encoding_auto, - mbfl_no_encoding_wchar, - mbfl_no_encoding_byte2be, - mbfl_no_encoding_byte2le, - mbfl_no_encoding_byte4be, - mbfl_no_encoding_byte4le, - mbfl_no_encoding_base64, - mbfl_no_encoding_uuencode, - mbfl_no_encoding_html_ent, - mbfl_no_encoding_qprint, - mbfl_no_encoding_utf7, - mbfl_no_encoding_utf7imap, - mbfl_no_encoding_2022kr, - mbfl_no_encoding_jis, - mbfl_no_encoding_2022jp, - mbfl_no_encoding_2022jpms, - mbfl_no_encoding_jis_ms, - mbfl_no_encoding_2022jp_2004, - mbfl_no_encoding_2022jp_kddi, - mbfl_no_encoding_cp50220, - mbfl_no_encoding_cp50220raw, - mbfl_no_encoding_cp50221, - mbfl_no_encoding_cp50222 -}; - -static inline int php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc) -{ - int i; - int size = sizeof(php_mb_unsupported_no_encoding_list)/sizeof(php_mb_unsupported_no_encoding_list[0]); - - for (i = 0; i < size; i++) { - - if (no_enc == php_mb_unsupported_no_encoding_list[i]) { - return 1; - } - - } - - return 0; -} - -static const enum mbfl_no_encoding php_mb_no_encoding_unicode_list[] = { - mbfl_no_encoding_utf8, - mbfl_no_encoding_utf8_docomo, - mbfl_no_encoding_utf8_kddi_a, - mbfl_no_encoding_utf8_kddi_b, - mbfl_no_encoding_utf8_sb, - mbfl_no_encoding_ucs4, - mbfl_no_encoding_ucs4be, - mbfl_no_encoding_ucs4le, - mbfl_no_encoding_utf32, - mbfl_no_encoding_utf32be, - mbfl_no_encoding_utf32le, - mbfl_no_encoding_ucs2, - mbfl_no_encoding_ucs2be, - mbfl_no_encoding_ucs2le, - mbfl_no_encoding_utf16, - mbfl_no_encoding_utf16be, - mbfl_no_encoding_utf16le -}; - -static inline int php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc) -{ - int i; - int size = sizeof(php_mb_no_encoding_unicode_list)/sizeof(php_mb_no_encoding_unicode_list[0]); - - for (i = 0; i < size; i++) { - - if (no_enc == php_mb_no_encoding_unicode_list[i]) { - return 1; - } - - } - - return 0; -} - -static const enum mbfl_no_encoding php_mb_no_encoding_utf8_list[] = { - mbfl_no_encoding_utf8, - mbfl_no_encoding_utf8_docomo, - mbfl_no_encoding_utf8_kddi_a, - mbfl_no_encoding_utf8_kddi_b, - mbfl_no_encoding_utf8_sb -}; - -static inline int php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc) -{ - int i; - int size = sizeof(php_mb_no_encoding_utf8_list)/sizeof(php_mb_no_encoding_utf8_list[0]); - - for (i = 0; i < size; i++) { - - if (no_enc == php_mb_no_encoding_utf8_list[i]) { - return 1; - } - - } - - return 0; -} - static inline int php_mb_check_code_point(long cp) { enum mbfl_no_encoding no_enc; @@ -2122,7 +2041,7 @@ static inline int php_mb_check_code_point(long cp) } else { buf_len = 4; buf = (char *) safe_emalloc(buf_len, 1, 1); - buf[0] = cp >> 24; + buf[0] = cp >> 24; buf[1] = (cp >> 16) & 0xff; buf[2] = (cp >> 8) & 0xff; buf[3] = cp & 0xff; @@ -2265,8 +2184,13 @@ PHP_FUNCTION(mb_parse_str) detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr); } else { zval tmp; - zend_array *symbol_table = zend_rebuild_symbol_table(); + zend_array *symbol_table; + if (zend_forbid_dynamic_call("mb_parse_str() with a single argument") == FAILURE) { + efree(encstr); + return; + } + symbol_table = zend_rebuild_symbol_table(); ZVAL_ARR(&tmp, symbol_table); detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr); } @@ -2390,14 +2314,21 @@ PHP_FUNCTION(mb_strlen) int n; mbfl_string string; char *enc_name = NULL; - size_t enc_name_len; + size_t enc_name_len, string_len; mbfl_string_init(&string); - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(string_len)) { + php_error_docref(NULL, E_WARNING, "String overflows the max allowed length of %u", UINT_MAX); + return; + } + + string.len = (uint32_t)string_len; + string.no_language = MBSTRG(language); if (enc_name == NULL) { string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; @@ -2423,10 +2354,10 @@ PHP_FUNCTION(mb_strlen) PHP_FUNCTION(mb_strpos) { int n, reverse = 0; - zend_long offset; + zend_long offset = 0, slen; mbfl_string haystack, needle; char *enc_name = NULL; - size_t enc_name_len; + size_t enc_name_len, haystack_len, needle_len; mbfl_string_init(&haystack); mbfl_string_init(&needle); @@ -2434,12 +2365,22 @@ PHP_FUNCTION(mb_strpos) haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; needle.no_language = MBSTRG(language); needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - offset = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &enc_name, &enc_name_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) { + php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX); + return; + } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) { + php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX); + return; + } + + haystack.len = (uint32_t)haystack_len; + needle.len = (uint32_t)needle_len; + if (enc_name != NULL) { haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); if (haystack.no_encoding == mbfl_no_encoding_invalid) { @@ -2448,7 +2389,11 @@ PHP_FUNCTION(mb_strpos) } } - if (offset < 0 || offset > mbfl_strlen(&haystack)) { + slen = mbfl_strlen(&haystack); + if (offset < 0) { + offset += slen; + } + if (offset < 0 || offset > slen) { php_error_docref(NULL, E_WARNING, "Offset not contained in string"); RETURN_FALSE; } @@ -2489,7 +2434,7 @@ PHP_FUNCTION(mb_strrpos) int n; mbfl_string haystack, needle; char *enc_name = NULL; - size_t enc_name_len; + size_t enc_name_len, haystack_len, needle_len; zval *zoffset = NULL; long offset = 0, str_flg; char *enc_name2 = NULL; @@ -2502,10 +2447,21 @@ PHP_FUNCTION(mb_strrpos) needle.no_language = MBSTRG(language); needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &zoffset, &enc_name, &enc_name_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) { + php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX); + return; + } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) { + php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX); + return; + } + + haystack.len = (uint32_t)haystack_len; + needle.len = (uint32_t)needle_len; + if (zoffset) { if (Z_TYPE_P(zoffset) == IS_STRING) { enc_name2 = Z_STRVAL_P(zoffset); @@ -2584,17 +2540,27 @@ PHP_FUNCTION(mb_strrpos) Finds position of first occurrence of a string within another, case insensitive */ PHP_FUNCTION(mb_stripos) { - int n; - zend_long offset; + int n = -1; + zend_long offset = 0; mbfl_string haystack, needle; const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; - size_t from_encoding_len; - n = -1; - offset = 0; + size_t from_encoding_len, haystack_len, needle_len; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) { return; } + + if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) { + php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX); + return; + } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) { + php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX); + return; + } + + haystack.len = (uint32_t)haystack_len; + needle.len = (uint32_t)needle_len; + if (needle.len == 0) { php_error_docref(NULL, E_WARNING, "Empty delimiter"); RETURN_FALSE; @@ -2613,18 +2579,27 @@ PHP_FUNCTION(mb_stripos) Finds position of last occurrence of a string within another, case insensitive */ PHP_FUNCTION(mb_strripos) { - int n; - zend_long offset; + int n = -1; + zend_long offset = 0; mbfl_string haystack, needle; const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; - size_t from_encoding_len; - n = -1; - offset = 0; + size_t from_encoding_len, haystack_len, needle_len; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) { + php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX); + return; + } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) { + php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX); + return; + } + + haystack.len = (uint32_t)haystack_len; + needle.len = (uint32_t)needle_len; + n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding); if (n >= 0) { @@ -2642,7 +2617,7 @@ PHP_FUNCTION(mb_strstr) int n, len, mblen; mbfl_string haystack, needle, result, *ret = NULL; char *enc_name = NULL; - size_t enc_name_len; + size_t enc_name_len, haystack_len, needle_len; zend_bool part = 0; mbfl_string_init(&haystack); @@ -2652,10 +2627,21 @@ PHP_FUNCTION(mb_strstr) needle.no_language = MBSTRG(language); needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) { + php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX); + return; + } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) { + php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX); + return; + } + + haystack.len = (uint32_t)haystack_len; + needle.len = (uint32_t)needle_len; + if (enc_name != NULL) { haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); if (haystack.no_encoding == mbfl_no_encoding_invalid) { @@ -2704,7 +2690,7 @@ PHP_FUNCTION(mb_strrchr) int n, len, mblen; mbfl_string haystack, needle, result, *ret = NULL; char *enc_name = NULL; - size_t enc_name_len; + size_t enc_name_len, haystack_len, needle_len; zend_bool part = 0; mbfl_string_init(&haystack); @@ -2714,10 +2700,21 @@ PHP_FUNCTION(mb_strrchr) needle.no_language = MBSTRG(language); needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) { + php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX); + return; + } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) { + php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX); + return; + } + + haystack.len = (uint32_t)haystack_len; + needle.len = (uint32_t)needle_len; + if (enc_name != NULL) { haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); if (haystack.no_encoding == mbfl_no_encoding_invalid) { @@ -2766,7 +2763,7 @@ PHP_FUNCTION(mb_strrchr) PHP_FUNCTION(mb_stristr) { zend_bool part = 0; - size_t from_encoding_len, len, mblen; + size_t from_encoding_len, len, mblen, haystack_len, needle_len; int n; mbfl_string haystack, needle, result, *ret = NULL; const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; @@ -2778,10 +2775,21 @@ PHP_FUNCTION(mb_stristr) needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) { + php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX); + return; + } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) { + php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX); + return; + } + + haystack.len = (uint32_t)haystack_len; + needle.len = (uint32_t)needle_len; + if (!needle.len) { php_error_docref(NULL, E_WARNING, "Empty delimiter"); RETURN_FALSE; @@ -2830,7 +2838,7 @@ PHP_FUNCTION(mb_strrichr) { zend_bool part = 0; int n, len, mblen; - size_t from_encoding_len; + size_t from_encoding_len, haystack_len, needle_len; mbfl_string haystack, needle, result, *ret = NULL; const char *from_encoding = MBSTRG(current_internal_encoding)->name; mbfl_string_init(&haystack); @@ -2841,10 +2849,21 @@ PHP_FUNCTION(mb_strrichr) needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) { + php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX); + return; + } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) { + php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX); + return; + } + + haystack.len = (uint32_t)haystack_len; + needle.len = (uint32_t)needle_len; + haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding); if (haystack.no_encoding == mbfl_no_encoding_invalid) { php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding); @@ -2889,7 +2908,7 @@ PHP_FUNCTION(mb_substr_count) int n; mbfl_string haystack, needle; char *enc_name = NULL; - size_t enc_name_len; + size_t enc_name_len, haystack_len, needle_len; mbfl_string_init(&haystack); mbfl_string_init(&needle); @@ -2898,10 +2917,21 @@ PHP_FUNCTION(mb_substr_count) needle.no_language = MBSTRG(language); needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &enc_name, &enc_name_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) { + php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX); + return; + } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) { + php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX); + return; + } + + haystack.len = (uint32_t)haystack_len; + needle.len = (uint32_t)needle_len; + if (enc_name != NULL) { haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); if (haystack.no_encoding == mbfl_no_encoding_invalid) { @@ -2928,15 +2958,14 @@ PHP_FUNCTION(mb_substr_count) Returns part of a string */ PHP_FUNCTION(mb_substr) { - size_t argc = ZEND_NUM_ARGS(); - char *str, *encoding; + char *str, *encoding = NULL; zend_long from, len; int mblen; size_t str_len, encoding_len; - zval *z_len = NULL; + zend_bool len_is_null = 1; mbfl_string string, result, *ret; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", &str, &str_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) { return; } @@ -2944,7 +2973,7 @@ PHP_FUNCTION(mb_substr) string.no_language = MBSTRG(language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (argc == 4) { + if (encoding) { string.no_encoding = mbfl_name2no_encoding(encoding); if (string.no_encoding == mbfl_no_encoding_invalid) { php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding); @@ -2955,11 +2984,8 @@ PHP_FUNCTION(mb_substr) string.val = (unsigned char *)str; string.len = str_len; - if (argc < 3 || Z_TYPE_P(z_len) == IS_NULL) { + if (len_is_null) { len = str_len; - } else { - convert_to_long_ex(z_len); - len = Z_LVAL_P(z_len); } /* measures length */ @@ -3008,22 +3034,28 @@ PHP_FUNCTION(mb_substr) Returns part of a string */ PHP_FUNCTION(mb_strcut) { - size_t argc = ZEND_NUM_ARGS(); - char *encoding; + char *encoding = NULL; zend_long from, len; - size_t encoding_len; - zval *z_len = NULL; + size_t encoding_len, string_len; + zend_bool len_is_null = 1; mbfl_string string, result, *ret; mbfl_string_init(&string); string.no_language = MBSTRG(language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", (char **)&string.val, &string_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) { return; } - if (argc == 4) { + if (ZEND_SIZE_T_UINT_OVFL(string_len)) { + php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX); + return; + } + + string.len = (uint32_t)string_len; + + if (encoding) { string.no_encoding = mbfl_name2no_encoding(encoding); if (string.no_encoding == mbfl_no_encoding_invalid) { php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding); @@ -3031,11 +3063,8 @@ PHP_FUNCTION(mb_strcut) } } - if (argc < 3 || Z_TYPE_P(z_len) == IS_NULL) { + if (len_is_null) { len = string.len; - } else { - convert_to_long_ex(z_len); - len = Z_LVAL_P(z_len); } /* if "from" position is negative, count start position from the end @@ -3080,17 +3109,24 @@ PHP_FUNCTION(mb_strwidth) int n; mbfl_string string; char *enc_name = NULL; - size_t enc_name_len; + size_t enc_name_len, string_len; mbfl_string_init(&string); string.no_language = MBSTRG(language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(string_len)) { + php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX); + return; + } + + string.len = (uint32_t)string_len; + if (enc_name != NULL) { string.no_encoding = mbfl_name2no_encoding(enc_name); if (string.no_encoding == mbfl_no_encoding_invalid) { @@ -3112,8 +3148,8 @@ PHP_FUNCTION(mb_strwidth) Trim the string in terminal width */ PHP_FUNCTION(mb_strimwidth) { - char *str, *trimmarker, *encoding; - zend_long from, width; + char *str, *trimmarker = NULL, *encoding = NULL; + zend_long from, width, swidth; size_t str_len, trimmarker_len, encoding_len; mbfl_string string, result, marker, *ret; @@ -3130,7 +3166,7 @@ PHP_FUNCTION(mb_strimwidth) marker.val = NULL; marker.len = 0; - if (ZEND_NUM_ARGS() == 5) { + if (encoding) { string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding); if (string.no_encoding == mbfl_no_encoding_invalid) { php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding); @@ -3141,17 +3177,29 @@ PHP_FUNCTION(mb_strimwidth) string.val = (unsigned char *)str; string.len = str_len; + if ((from < 0) || (width < 0)) { + swidth = mbfl_strwidth(&string); + } + + if (from < 0) { + from += swidth; + } + if (from < 0 || (size_t)from > str_len) { php_error_docref(NULL, E_WARNING, "Start position is out of range"); RETURN_FALSE; } if (width < 0) { - php_error_docref(NULL, E_WARNING, "Width is negative value"); + width = swidth + width - from; + } + + if (width < 0) { + php_error_docref(NULL, E_WARNING, "Width is out of range"); RETURN_FALSE; } - if (ZEND_NUM_ARGS() >= 4) { + if (trimmarker) { marker.val = (unsigned char *)trimmarker; marker.len = trimmarker_len; } @@ -3167,6 +3215,31 @@ PHP_FUNCTION(mb_strimwidth) } /* }}} */ + +/* See mbfl_no_encoding definition for list of unsupported encodings */ +static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc) +{ + return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint) + || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap) + || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms) + || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222)); +} + + +/* See mbfl_no_encoding definition for list of unicode encodings */ +static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc) +{ + return (no_enc >= mbfl_no_encoding_ucs4 && no_enc <= mbfl_no_encoding_utf8_sb); +} + + +/* See mbfl_no_encoding definition for list of UTF-8 encodings */ +static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc) +{ + return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb); +} + + /* {{{ MBSTRING_API char *php_mb_convert_encoding() */ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len) { @@ -3237,7 +3310,28 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co return NULL; } mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); - mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); + + if (string.no_encoding == MBSTRG(current_internal_encoding)->no_encoding) { + mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); + } else if (php_mb_is_no_encoding_unicode(string.no_encoding) && php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { + + if (php_mb_is_no_encoding_utf8(string.no_encoding)) { + + if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff && + 0xe000 > MBSTRG(current_filter_illegal_substchar) + ) { + mbfl_buffer_converter_illegal_substchar(convd, 0x3f); + } else { + mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); + } + + } else { + mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); + } + + } else { + mbfl_buffer_converter_illegal_substchar(convd, 0x3f); + } /* do it */ ret = mbfl_buffer_converter_feed_result(convd, &string, &result); @@ -3260,7 +3354,7 @@ PHP_FUNCTION(mb_convert_encoding) { char *arg_str, *arg_new; size_t str_len, new_len; - zval *arg_old; + zval *arg_old = NULL; size_t size, l, n; char *_from_encodings = NULL, *ret, *s_free = NULL; @@ -3271,7 +3365,7 @@ PHP_FUNCTION(mb_convert_encoding) return; } - if (ZEND_NUM_ARGS() == 3) { + if (arg_old) { switch (Z_TYPE_P(arg_old)) { case IS_ARRAY: target_hash = Z_ARRVAL_P(arg_old); @@ -3409,21 +3503,21 @@ PHP_FUNCTION(mb_detect_encoding) char *str; size_t str_len; zend_bool strict=0; - zval *encoding_list; + zval *encoding_list = NULL; mbfl_string string; const mbfl_encoding *ret; const mbfl_encoding **elist, **list; size_t size; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) { return; } /* make encoding list */ list = NULL; size = 0; - if (ZEND_NUM_ARGS() >= 2 && !Z_ISNULL_P(encoding_list)) { + if (encoding_list) { switch (Z_TYPE_P(encoding_list)) { case IS_ARRAY: if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) { @@ -3487,6 +3581,10 @@ PHP_FUNCTION(mb_list_encodings) const mbfl_encoding *encoding; int i; + if (zend_parse_parameters_none() == FAILURE) { + return; + } + array_init(return_value); i = 0; encodings = mbfl_get_supported_encodings(); @@ -3535,17 +3633,24 @@ PHP_FUNCTION(mb_encode_mimeheader) char *trans_enc_name = NULL; size_t trans_enc_name_len; char *linefeed = "\r\n"; - size_t linefeed_len; + size_t linefeed_len, string_len; zend_long indent = 0; mbfl_string_init(&string); string.no_language = MBSTRG(language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string_len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(string_len)) { + php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX); + return; + } + + string.len = (uint32_t)string_len; + charset = mbfl_no_encoding_pass; transenc = mbfl_no_encoding_base64; @@ -3588,15 +3693,23 @@ PHP_FUNCTION(mb_encode_mimeheader) PHP_FUNCTION(mb_decode_mimeheader) { mbfl_string string, result, *ret; + size_t string_len; mbfl_string_init(&string); string.no_language = MBSTRG(language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string.len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(string_len)) { + php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX); + return; + } + + string.len = (uint32_t)string_len; + mbfl_string_init(&result); ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding); if (ret != NULL) { @@ -3618,16 +3731,23 @@ PHP_FUNCTION(mb_convert_kana) char *optstr = NULL; size_t optstr_len; char *encname = NULL; - size_t encname_len; + size_t encname_len, string_len; mbfl_string_init(&string); string.no_language = MBSTRG(language); string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string_len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) { return; } + if (ZEND_SIZE_T_UINT_OVFL(string_len)) { + php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX); + return; + } + + string.len = (uint32_t)string_len; + /* option */ if (optstr != NULL) { char *p = optstr; @@ -3948,17 +4068,16 @@ detect_end: static void php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) { - char *str, *encoding; + char *str, *encoding = NULL; size_t str_len, encoding_len; zval *zconvmap, *hash_entry; HashTable *target_hash; - int argc = ZEND_NUM_ARGS(); int i, *convmap, *mapelm, mapsize=0; zend_bool is_hex = 0; mbfl_string string, result, *ret; enum mbfl_no_encoding no_encoding; - if (zend_parse_parameters(argc, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) { return; } @@ -3969,7 +4088,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) string.len = str_len; /* encoding */ - if ((argc == 3 || argc == 4) && encoding_len > 0) { + if (encoding && encoding_len > 0) { no_encoding = mbfl_name2no_encoding(encoding); if (no_encoding == mbfl_no_encoding_invalid) { php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding); @@ -3979,10 +4098,8 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) } } - if (argc == 4) { - if (type == 0 && is_hex) { - type = 2; /* output in hex format */ - } + if (type == 0 && is_hex) { + type = 2; /* output in hex format */ } /* conversion map */ @@ -4162,7 +4279,7 @@ static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t zval val; /* FIXME: some locale free implementation is * really required here,,, */ - php_strtoupper(fld_name->val, fld_name->len); + php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name)); ZVAL_STR(&val, fld_val); zend_hash_update(ht, fld_name, &val); @@ -4210,7 +4327,7 @@ out: zval val; /* FIXME: some locale free implementation is * really required here,,, */ - php_strtoupper(fld_name->val, fld_name->len); + php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name)); ZVAL_STR(&val, fld_val); zend_hash_update(ht, fld_name, &val); @@ -4284,7 +4401,7 @@ PHP_FUNCTION(mb_send_mail) MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len); } if (extra_cmd) { - MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd->val, extra_cmd->len); + MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd)); } zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0); @@ -4293,11 +4410,12 @@ PHP_FUNCTION(mb_send_mail) _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len); } - if ((s = zend_hash_str_find_ptr(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) { + if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) { char *tmp; char *param_name; char *charset = NULL; + ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING); p = strchr(Z_STRVAL_P(s), ';'); if (p != NULL) { @@ -4328,9 +4446,10 @@ PHP_FUNCTION(mb_send_mail) suppressed_hdrs.cnt_type = 1; } - if ((s = zend_hash_str_find_ptr(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) { + if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) { enum mbfl_no_encoding _body_enc; + ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING); _body_enc = mbfl_name2no_encoding(Z_STRVAL_P(s)); switch (_body_enc) { case mbfl_no_encoding_base64: @@ -4472,10 +4591,10 @@ PHP_FUNCTION(mb_send_mail) if (force_extra_parameters) { extra_cmd = php_escape_shell_cmd(force_extra_parameters); } else if (extra_cmd) { - extra_cmd = php_escape_shell_cmd(extra_cmd->val); + extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd)); } - if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? extra_cmd->val : NULL)) { + if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) { RETVAL_TRUE; } else { RETVAL_FALSE; @@ -4684,7 +4803,7 @@ PHP_FUNCTION(mb_get_info) } /* }}} */ -static inline int php_mb_check_encoding(const char *input, size_t length, const char *enc) +MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc) { const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); mbfl_buffer_converter *convd; @@ -4757,6 +4876,342 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ + +static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc) +{ + enum mbfl_no_encoding no_enc; + char* ret; + size_t ret_len; + const mbfl_encoding *encoding; + unsigned char char_len; + zend_long cp; + + if (enc == NULL) { + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + } else { + no_enc = mbfl_name2no_encoding(enc); + + if (no_enc == mbfl_no_encoding_invalid) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + return -1; + } + } + + if (php_mb_is_no_encoding_unicode(no_enc)) { + + ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); + + if (ret == NULL) { + return -1; + } + + cp = (unsigned char) ret[0] << 24 | \ + (unsigned char) ret[1] << 16 | \ + (unsigned char) ret[2] << 8 | \ + (unsigned char) ret[3]; + + efree(ret); + + return cp; + + } else if (php_mb_is_unsupported_no_encoding(no_enc)) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return -1; + } + + ret = php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); + + if (ret == NULL) { + return -1; + } + + encoding = mbfl_no2encoding(no_enc); + char_len = php_mb_mbchar_bytes_ex(ret, encoding); + + if (char_len == 1) { + cp = (unsigned char) ret[0]; + } else if (char_len == 2) { + cp = ((unsigned char) ret[0] << 8) | \ + (unsigned char) ret[1]; + } else if (char_len == 3) { + cp = ((unsigned char) ret[0] << 16) | \ + ((unsigned char) ret[1] << 8) | \ + (unsigned char) ret[2]; + } else { + cp = ((unsigned char) ret[0] << 24) | \ + ((unsigned char) ret[1] << 16) | \ + ((unsigned char) ret[2] << 8) | \ + (unsigned char) ret[3]; + } + + efree(ret); + + return cp; +} + + +/* {{{ proto bool mb_ord([string str[, string encoding]]) */ +PHP_FUNCTION(mb_ord) +{ + char* str; + size_t str_len; + char* enc = NULL; + size_t enc_len; + zend_long cp; + +#ifndef FAST_ZPP + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) { + return; + } +#else + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_STRING(str, str_len) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) + ZEND_PARSE_PARAMETERS_END(); +#endif + + cp = php_mb_ord(str, str_len, enc); + + if (0 > cp) { + RETURN_FALSE; + } + + RETURN_LONG(cp); +} +/* }}} */ + + +static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len) +{ + enum mbfl_no_encoding no_enc; + char* buf; + size_t buf_len; + char* ret; + size_t ret_len; + + if (enc == NULL) { + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + } else { + no_enc = mbfl_name2no_encoding(enc); + if (no_enc == mbfl_no_encoding_invalid) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + return NULL; + } + } + + if (php_mb_is_no_encoding_utf8(no_enc)) { + + if (0 > cp || cp > 0x10ffff || (cp > 0xd7ff && 0xe000 > cp)) { + if (php_mb_is_no_encoding_utf8(MBSTRG(current_internal_encoding)->no_encoding)) { + cp = MBSTRG(current_filter_illegal_substchar); + } else if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { + if (0xd800 > MBSTRG(current_filter_illegal_substchar) || MBSTRG(current_filter_illegal_substchar) > 0xdfff) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } + } else { + cp = 0x3f; + } + } + + if (cp < 0x80) { + ret_len = 1; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = cp; + ret[1] = 0; + } else if (cp < 0x800) { + ret_len = 2; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xc0 | (cp >> 6); + ret[1] = 0x80 | (cp & 0x3f); + ret[2] = 0; + } else if (cp < 0x10000) { + ret_len = 3; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xe0 | (cp >> 12); + ret[1] = 0x80 | ((cp >> 6) & 0x3f); + ret[2] = 0x80 | (cp & 0x3f); + ret[3] = 0; + } else { + ret_len = 4; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xf0 | (cp >> 18); + ret[1] = 0x80 | ((cp >> 12) & 0x3f); + ret[2] = 0x80 | ((cp >> 6) & 0x3f); + ret[3] = 0x80 | (cp & 0x3f); + ret[4] = 0; + } + + if (output_len) { + *output_len = ret_len; + } + + return ret; + + } else if (php_mb_is_no_encoding_unicode(no_enc)) { + + if (0 > cp || 0x10ffff < cp) { + + if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } + + } + + buf_len = 4; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = (cp >> 24) & 0xff; + buf[1] = (cp >> 16) & 0xff; + buf[2] = (cp >> 8) & 0xff; + buf[3] = cp & 0xff; + buf[4] = 0; + + ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len); + efree(buf); + + if (output_len) { + *output_len = ret_len; + } + + return ret; + + } else if (php_mb_is_unsupported_no_encoding(no_enc)) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return NULL; + } + + if (0 > cp || cp > 0x100000000) { + if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } + } + + if (cp < 0x100) { + buf_len = 1; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp; + buf[1] = 0; + } else if (cp < 0x10000) { + buf_len = 2; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 8; + buf[1] = cp & 0xff; + buf[2] = 0; + } else if (cp < 0x1000000) { + buf_len = 3; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 16; + buf[1] = (cp >> 8) & 0xff; + buf[2] = cp & 0xff; + buf[3] = 0; + } else { + buf_len = 4; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 24; + buf[1] = (cp >> 16) & 0xff; + buf[2] = (cp >> 8) & 0xff; + buf[3] = cp & 0xff; + buf[4] = 0; + } + + ret = php_mb_convert_encoding(buf, buf_len, enc, enc, &ret_len); + efree(buf); + + if (output_len) { + *output_len = ret_len; + } + + return ret; +} + + +/* {{{ proto bool mb_ord([int cp[, string encoding]]) */ +PHP_FUNCTION(mb_chr) +{ + zend_long cp; + char* enc = NULL; + size_t enc_len; + char* ret; + size_t ret_len; + +#ifndef FAST_ZPP + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l|s", &cp, &enc, &enc_len) == FAILURE) { + return; + } +#else + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_LONG(cp) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) + ZEND_PARSE_PARAMETERS_END(); +#endif + + ret = php_mb_chr(cp, enc, &ret_len); + + if (ret == NULL) { + RETURN_FALSE; + } + + RETVAL_STRING(ret); + efree(ret); +} +/* }}} */ + + +static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc) +{ + size_t ret_len; + + return php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); +} + + +/* {{{ proto bool mb_scrub([string str[, string encoding]]) */ +PHP_FUNCTION(mb_scrub) +{ + char* str; + size_t str_len; + char *enc = NULL; + size_t enc_len; + char *ret; + +#ifndef FAST_ZPP + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) { + return; + } +#else + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_STRING(str, str_len) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) +ZEND_PARSE_PARAMETERS_END(); +#endif + + if (enc == NULL) { + enc = (char *) MBSTRG(current_internal_encoding)->name; + } else if (!mbfl_is_support_encoding(enc)) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + RETURN_FALSE; + } + + ret = php_mb_scrub(str, str_len, enc); + + if (ret == NULL) { + RETURN_FALSE; + } + + RETVAL_STRING(ret); + efree(ret); +} +/* }}} */ + + /* {{{ php_mb_populate_current_detect_order_list */ static void php_mb_populate_current_detect_order_list(void) { @@ -4920,6 +5375,9 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int break; } } else { + if (offset < 0) { + offset += (long)haystack_char_len; + } if (offset < 0 || offset > haystack_char_len) { php_error_docref(NULL, E_WARNING, "Offset not contained in string"); break; |