diff options
Diffstat (limited to 'ext/mbstring/php_mbregex.c')
-rw-r--r-- | ext/mbstring/php_mbregex.c | 322 |
1 files changed, 124 insertions, 198 deletions
diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index 76aff4f244..a797efbf7b 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -1,7 +1,5 @@ /* +----------------------------------------------------------------------+ - | PHP Version 7 | - +----------------------------------------------------------------------+ | Copyright (c) The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | @@ -23,18 +21,19 @@ #include "php.h" #include "php_ini.h" -#if HAVE_MBREGEX +#ifdef HAVE_MBREGEX #include "zend_smart_str.h" #include "ext/standard/info.h" #include "php_mbregex.h" #include "mbstring.h" +#include "libmbfl/filters/mbfilter_utf8.h" #include "php_onig_compat.h" /* must come prior to the oniguruma header */ #include <oniguruma.h> #undef UChar -#if ONIGURUMA_VERSION_INT < 60800 +#if !defined(ONIGURUMA_VERSION_INT) || ONIGURUMA_VERSION_INT < 60800 typedef void OnigMatchParam; #define onig_new_match_param() (NULL) #define onig_initialize_match_param(x) (void)(x) @@ -52,6 +51,7 @@ ZEND_EXTERN_MODULE_GLOBALS(mbstring) struct _zend_mb_regex_globals { OnigEncoding default_mbctype; OnigEncoding current_mbctype; + const mbfl_encoding *current_mbctype_mbfl_encoding; HashTable ht_rc; zval search_str; zval *search_str_val; @@ -75,6 +75,7 @@ static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals) { pglobals->default_mbctype = ONIG_ENCODING_UTF8; pglobals->current_mbctype = ONIG_ENCODING_UTF8; + pglobals->current_mbctype_mbfl_encoding = &mbfl_encoding_utf8; ZVAL_UNDEF(&pglobals->search_str); pglobals->search_re = (php_mb_regex_t*)NULL; pglobals->search_pos = 0; @@ -150,6 +151,7 @@ PHP_RINIT_FUNCTION(mb_regex) PHP_RSHUTDOWN_FUNCTION(mb_regex) { MBREX(current_mbctype) = MBREX(default_mbctype); + MBREX(current_mbctype_mbfl_encoding) = mbfl_name2encoding(php_mb_regex_get_default_mbctype()); if (!Z_ISUNDEF(MBREX(search_str))) { zval_ptr_dtor(&MBREX(search_str)); @@ -274,91 +276,91 @@ static const php_mb_regex_enc_name_map_t enc_name_map[] = { #endif #ifdef ONIG_ENCODING_ISO_8859_1 { - "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0", + "ISO-8859-1\0ISO8859-1\0", ONIG_ENCODING_ISO_8859_1 }, #endif #ifdef ONIG_ENCODING_ISO_8859_2 { - "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0", + "ISO-8859-2\0ISO8859-2\0", ONIG_ENCODING_ISO_8859_2 }, #endif #ifdef ONIG_ENCODING_ISO_8859_3 { - "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0", + "ISO-8859-3\0ISO8859-3\0", ONIG_ENCODING_ISO_8859_3 }, #endif #ifdef ONIG_ENCODING_ISO_8859_4 { - "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0", + "ISO-8859-4\0ISO8859-4\0", ONIG_ENCODING_ISO_8859_4 }, #endif #ifdef ONIG_ENCODING_ISO_8859_5 { - "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0", + "ISO-8859-5\0ISO8859-5\0", ONIG_ENCODING_ISO_8859_5 }, #endif #ifdef ONIG_ENCODING_ISO_8859_6 { - "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0", + "ISO-8859-6\0ISO8859-6\0", ONIG_ENCODING_ISO_8859_6 }, #endif #ifdef ONIG_ENCODING_ISO_8859_7 { - "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0", + "ISO-8859-7\0ISO8859-7\0", ONIG_ENCODING_ISO_8859_7 }, #endif #ifdef ONIG_ENCODING_ISO_8859_8 { - "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0", + "ISO-8859-8\0ISO8859-8\0", ONIG_ENCODING_ISO_8859_8 }, #endif #ifdef ONIG_ENCODING_ISO_8859_9 { - "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0", + "ISO-8859-9\0ISO8859-9\0", ONIG_ENCODING_ISO_8859_9 }, #endif #ifdef ONIG_ENCODING_ISO_8859_10 { - "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0", + "ISO-8859-10\0ISO8859-10\0", ONIG_ENCODING_ISO_8859_10 }, #endif #ifdef ONIG_ENCODING_ISO_8859_11 { - "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0", + "ISO-8859-11\0ISO8859-11\0", ONIG_ENCODING_ISO_8859_11 }, #endif #ifdef ONIG_ENCODING_ISO_8859_13 { - "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0", + "ISO-8859-13\0ISO8859-13\0", ONIG_ENCODING_ISO_8859_13 }, #endif #ifdef ONIG_ENCODING_ISO_8859_14 { - "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0", + "ISO-8859-14\0ISO8859-14\0", ONIG_ENCODING_ISO_8859_14 }, #endif #ifdef ONIG_ENCODING_ISO_8859_15 { - "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0", + "ISO-8859-15\0ISO8859-15\0", ONIG_ENCODING_ISO_8859_15 }, #endif #ifdef ONIG_ENCODING_ISO_8859_16 { - "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0", + "ISO-8859-16\0ISO8859-16\0", ONIG_ENCODING_ISO_8859_16 }, #endif @@ -417,6 +419,7 @@ int php_mb_regex_set_mbctype(const char *encname) return FAILURE; } MBREX(current_mbctype) = mbctype; + MBREX(current_mbctype_mbfl_encoding) = mbfl_name2encoding(encname); return SUCCESS; } /* }}} */ @@ -440,6 +443,13 @@ const char *php_mb_regex_get_mbctype(void) } /* }}} */ +/* {{{ php_mb_regex_get_mbctype_encoding */ +const mbfl_encoding *php_mb_regex_get_mbctype_encoding(void) +{ + return MBREX(current_mbctype_mbfl_encoding); +} +/* }}} */ + /* {{{ php_mb_regex_get_default_mbctype */ const char *php_mb_regex_get_default_mbctype(void) { @@ -451,14 +461,15 @@ const char *php_mb_regex_get_default_mbctype(void) * regex cache */ /* {{{ php_mbregex_compile_pattern */ -static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, size_t patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax) +static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, size_t patlen, OnigOptionType options, OnigSyntaxType *syntax) { int err_code = 0; php_mb_regex_t *retval = NULL, *rc = NULL; OnigErrorInfo err_info; OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; + OnigEncoding enc = MBREX(current_mbctype); - if (!php_mb_check_encoding(pattern, patlen, _php_mb_regex_mbctype2name(enc))) { + if (!php_mb_check_encoding(pattern, patlen, php_mb_regex_get_mbctype_encoding())) { php_error_docref(NULL, E_WARNING, "Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc)); return NULL; @@ -757,6 +768,7 @@ static inline void mb_regex_substitute( p++; break; case 'k': + { clen = (int) php_mb_mbchar_bytes_ex(++p, enc); if (clen != 1 || p == eos || (p[0] != '<' && p[0] != '\'')) { /* not a backref delimiter */ @@ -806,6 +818,7 @@ static inline void mb_regex_substitute( } no = onig_name_to_backref_number(regexp, (OnigUChar *)name, (OnigUChar *)name_end, regs); break; + } default: /* We're not treating \ as an escape character and will interpret something like * \\1 as \ followed by \1, rather than \\ followed by 1. This is because this @@ -833,35 +846,28 @@ static inline void mb_regex_substitute( * php functions */ -/* {{{ proto string mb_regex_encoding([string encoding]) - Returns the current encoding for regex as a string. */ +/* {{{ Returns the current encoding for regex as a string. */ PHP_FUNCTION(mb_regex_encoding) { char *encoding = NULL; size_t encoding_len; - OnigEncoding mbctype; if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &encoding, &encoding_len) == FAILURE) { - return; + RETURN_THROWS(); } if (!encoding) { - const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype)); - - if (retval == NULL) { - RETURN_FALSE; - } + const char *retval = php_mb_regex_get_mbctype(); + ZEND_ASSERT(retval != NULL); - RETURN_STRING((char *)retval); + RETURN_STRING(retval); } else { - mbctype = _php_mb_regex_name2mbctype(encoding); - - if (mbctype == ONIG_ENCODING_UNDEF) { - php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding); - RETURN_FALSE; + if (php_mb_regex_set_mbctype(encoding) == FAILURE) { + zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", encoding); + RETURN_THROWS(); } - MBREX(current_mbctype) = mbctype; + /* TODO Make function return previous encoding? */ RETURN_TRUE; } } @@ -890,30 +896,30 @@ static int _php_mb_onig_search(regex_t* reg, const OnigUChar* str, const OnigUCh /* {{{ _php_mb_regex_ereg_exec */ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) { - zval *arg_pattern, *array = NULL; - char *string; - size_t string_len; + zval *array = NULL; + char *arg_pattern, *string; + size_t arg_pattern_len, string_len; php_mb_regex_t *re; OnigRegion *regs = NULL; int i, match_len, beg, end; OnigOptionType options; char *str; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) { - RETURN_FALSE; + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_pattern, &arg_pattern_len, &string, &string_len, &array) == FAILURE) { + RETURN_THROWS(); } if (array != NULL) { array = zend_try_array_init(array); if (!array) { - return; + RETURN_THROWS(); } } if (!php_mb_check_encoding( string, string_len, - _php_mb_regex_mbctype2name(MBREX(current_mbctype)) + php_mb_regex_get_mbctype_encoding() )) { RETURN_FALSE; } @@ -923,25 +929,13 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) options |= ONIG_OPTION_IGNORECASE; } - /* compile the regular expression from the supplied regex */ - if (Z_TYPE_P(arg_pattern) != IS_STRING) { - /* we convert numbers to integers and treat them as a string */ - if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) { - convert_to_long_ex(arg_pattern); /* get rid of decimal places */ - } - if (!try_convert_to_string(arg_pattern)) { - return; - } - /* don't bother doing an extended regex with just a number */ - } - - if (Z_STRLEN_P(arg_pattern) == 0) { - php_error_docref(NULL, E_WARNING, "empty pattern"); + if (arg_pattern_len == 0) { + php_error_docref(NULL, E_WARNING, "Empty pattern"); RETVAL_FALSE; goto out; } - re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax)); + re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(regex_default_syntax)); if (re == NULL) { RETVAL_FALSE; goto out; @@ -987,16 +981,14 @@ out: } /* }}} */ -/* {{{ proto int mb_ereg(string pattern, string string [, array registers]) - Regular expression match for multibyte string */ +/* {{{ Regular expression match for multibyte string */ PHP_FUNCTION(mb_ereg) { _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); } /* }}} */ -/* {{{ proto int mb_eregi(string pattern, string string [, array registers]) - Case-insensitive regular expression match for multibyte string */ +/* {{{ Case-insensitive regular expression match for multibyte string */ PHP_FUNCTION(mb_eregi) { _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); @@ -1006,8 +998,6 @@ PHP_FUNCTION(mb_eregi) /* {{{ _php_mb_regex_ereg_replace_exec */ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable) { - zval *arg_pattern_zval; - char *arg_pattern; size_t arg_pattern_len; @@ -1030,47 +1020,34 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp OnigUChar *pos; OnigUChar *string_lim; char *description = NULL; - char pat_buf[6]; - const mbfl_encoding *enc; + const mbfl_encoding *enc = php_mb_regex_get_mbctype_encoding(); + ZEND_ASSERT(enc != NULL); - { - const char *current_enc_name; - current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype)); - if (current_enc_name == NULL || - (enc = mbfl_name2encoding(current_enc_name)) == NULL) { - php_error_docref(NULL, E_WARNING, "Unknown error"); - RETURN_FALSE; - } - } eval = 0; { char *option_str = NULL; size_t option_str_len = 0; if (!is_callable) { - if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s", - &arg_pattern_zval, + if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|s", + &arg_pattern, &arg_pattern_len, &replace, &replace_len, &string, &string_len, &option_str, &option_str_len) == FAILURE) { - RETURN_FALSE; + RETURN_THROWS(); } } else { - if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s", - &arg_pattern_zval, + if (zend_parse_parameters(ZEND_NUM_ARGS(), "sfs|s", + &arg_pattern, &arg_pattern_len, &arg_replace_fci, &arg_replace_fci_cache, &string, &string_len, &option_str, &option_str_len) == FAILURE) { - RETURN_FALSE; + RETURN_THROWS(); } } - if (!php_mb_check_encoding( - string, - string_len, - _php_mb_regex_mbctype2name(MBREX(current_mbctype)) - )) { + if (!php_mb_check_encoding(string, string_len, enc)) { RETURN_NULL(); } @@ -1081,36 +1058,22 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp syntax = MBREX(regex_default_syntax); } } - if (eval && !is_callable) { - php_error_docref(NULL, E_DEPRECATED, "The 'e' option is deprecated, use mb_ereg_replace_callback instead"); - } - if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) { - arg_pattern = Z_STRVAL_P(arg_pattern_zval); - arg_pattern_len = Z_STRLEN_P(arg_pattern_zval); - } else { - php_error_docref(NULL, E_DEPRECATED, - "Non-string patterns will be interpreted as strings in the future. " - "Use an explicit chr() call to preserve the current behavior"); - - /* FIXME: this code is not multibyte aware! */ - convert_to_long_ex(arg_pattern_zval); - pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval); - pat_buf[1] = '\0'; - pat_buf[2] = '\0'; - pat_buf[3] = '\0'; - pat_buf[4] = '\0'; - pat_buf[5] = '\0'; - - arg_pattern = pat_buf; - arg_pattern_len = 1; + if (eval) { + if (is_callable) { + php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback"); + } else { + php_error_docref(NULL, E_WARNING, "The 'e' option is no longer supported, use mb_ereg_replace_callback instead"); + } + RETURN_FALSE; } + /* create regex pattern buffer */ - re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax); + re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, syntax); if (re == NULL) { RETURN_FALSE; } - if (eval || is_callable) { + if (is_callable) { pbuf = &eval_buf; description = zend_make_compiled_string_description("mbregex replace"); } else { @@ -1118,13 +1081,6 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp description = NULL; } - if (is_callable) { - if (eval) { - php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback"); - RETURN_FALSE; - } - } - /* do the actual work */ err = 0; pos = (OnigUChar *)string; @@ -1146,35 +1102,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp mb_regex_substitute(pbuf, string, string_len, replace, replace_len, re, regs, enc); } - if (eval) { - zval v; - zend_string *eval_str; - /* null terminate buffer */ - smart_str_0(&eval_buf); - - if (eval_buf.s) { - eval_str = eval_buf.s; - } else { - eval_str = ZSTR_EMPTY_ALLOC(); - } - - /* do eval */ - if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) { - efree(description); - zend_throw_error(NULL, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str)); - onig_region_free(regs, 1); - smart_str_free(&out_buf); - smart_str_free(&eval_buf); - RETURN_FALSE; - } - - /* result of eval */ - convert_to_string(&v); - smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v)); - /* Clean up */ - smart_str_free(&eval_buf); - zval_ptr_dtor_str(&v); - } else if (is_callable) { + if (is_callable) { zval args[1]; zval subpats, retval; int i; @@ -1247,32 +1175,28 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp } /* }}} */ -/* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option]) - Replace regular expression for multibyte string */ +/* {{{ Replace regular expression for multibyte string */ PHP_FUNCTION(mb_ereg_replace) { _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0); } /* }}} */ -/* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string) - Case insensitive replace regular expression for multibyte string */ +/* {{{ Case insensitive replace regular expression for multibyte string */ PHP_FUNCTION(mb_eregi_replace) { _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0); } /* }}} */ -/* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option]) - regular expression for multibyte string using replacement callback */ +/* {{{ regular expression for multibyte string using replacement callback */ PHP_FUNCTION(mb_ereg_replace_callback) { _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1); } /* }}} */ -/* {{{ proto array mb_split(string pattern, string string [, int limit]) - split multibyte string into array by regular expression */ +/* {{{ split multibyte string into array by regular expression */ PHP_FUNCTION(mb_split) { char *arg_pattern; @@ -1287,20 +1211,19 @@ PHP_FUNCTION(mb_split) zend_long count = -1; if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) { - RETURN_FALSE; + RETURN_THROWS(); } if (count > 0) { count--; } - if (!php_mb_check_encoding(string, string_len, - _php_mb_regex_mbctype2name(MBREX(current_mbctype)))) { + if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) { RETURN_FALSE; } /* create regex pattern buffer */ - if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) { + if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(regex_default_syntax))) == NULL) { RETURN_FALSE; } @@ -1355,8 +1278,7 @@ PHP_FUNCTION(mb_split) } /* }}} */ -/* {{{ proto bool mb_ereg_match(string pattern, string string [,string option]) - Regular expression match for multibyte string */ +/* {{{ Regular expression match for multibyte string */ PHP_FUNCTION(mb_ereg_match) { char *arg_pattern; @@ -1378,7 +1300,7 @@ PHP_FUNCTION(mb_ereg_match) if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", &arg_pattern, &arg_pattern_len, &string, &string_len, &option_str, &option_str_len)==FAILURE) { - RETURN_FALSE; + RETURN_THROWS(); } if (option_str != NULL) { @@ -1389,12 +1311,11 @@ PHP_FUNCTION(mb_ereg_match) } } - if (!php_mb_check_encoding(string, string_len, - _php_mb_regex_mbctype2name(MBREX(current_mbctype)))) { + if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) { RETURN_FALSE; } - if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) { + if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { RETURN_FALSE; } @@ -1425,13 +1346,15 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) char *arg_pattern = NULL, *arg_options = NULL; size_t arg_pattern_len, arg_options_len; int err; - size_t n, i, pos, len, beg, end; + size_t n, i, pos, len; + /* Stored as int* in the OnigRegion struct */ + int beg, end; OnigOptionType option = 0; OnigUChar *str; OnigSyntaxType *syntax; if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) { - return; + RETURN_THROWS(); } if (arg_options) { @@ -1448,7 +1371,7 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) if (arg_pattern) { /* create regex pattern buffer */ - if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) { + if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { RETURN_FALSE; } } @@ -1532,46 +1455,41 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) } /* }}} */ -/* {{{ proto bool mb_ereg_search([string pattern[, string option]]) - Regular expression search for multibyte string */ +/* {{{ Regular expression search for multibyte string */ PHP_FUNCTION(mb_ereg_search) { _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); } /* }}} */ -/* {{{ proto array mb_ereg_search_pos([string pattern[, string option]]) - Regular expression search for multibyte string */ +/* {{{ Regular expression search for multibyte string */ PHP_FUNCTION(mb_ereg_search_pos) { _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); } /* }}} */ -/* {{{ proto array mb_ereg_search_regs([string pattern[, string option]]) - Regular expression search for multibyte string */ +/* {{{ Regular expression search for multibyte string */ PHP_FUNCTION(mb_ereg_search_regs) { _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2); } /* }}} */ -/* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]]) - Initialize string and regular expression for search. */ +/* {{{ Initialize string and regular expression for search. */ PHP_FUNCTION(mb_ereg_search_init) { - int argc = ZEND_NUM_ARGS(); zend_string *arg_str; char *arg_pattern = NULL, *arg_options = NULL; size_t arg_pattern_len = 0, arg_options_len = 0; OnigSyntaxType *syntax = NULL; OnigOptionType option; - if (zend_parse_parameters(argc, "S|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) { - return; + if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) { + RETURN_THROWS(); } - if (argc > 1 && arg_pattern_len == 0) { + if (ZEND_NUM_ARGS() > 1 && arg_pattern_len == 0) { php_error_docref(NULL, E_WARNING, "Empty pattern"); RETURN_FALSE; } @@ -1579,14 +1497,14 @@ PHP_FUNCTION(mb_ereg_search_init) option = MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); - if (argc == 3) { + if (ZEND_NUM_ARGS() == 3) { option = 0; _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL); } - if (argc > 1) { + if (ZEND_NUM_ARGS() > 1) { /* create regex pattern buffer */ - if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) { + if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { RETURN_FALSE; } } @@ -1600,7 +1518,7 @@ PHP_FUNCTION(mb_ereg_search_init) if (php_mb_check_encoding( ZSTR_VAL(arg_str), ZSTR_LEN(arg_str), - _php_mb_regex_mbctype2name(MBREX(current_mbctype)) + php_mb_regex_get_mbctype_encoding() )) { MBREX(search_pos) = 0; RETVAL_TRUE; @@ -1616,13 +1534,18 @@ PHP_FUNCTION(mb_ereg_search_init) } /* }}} */ -/* {{{ proto array mb_ereg_search_getregs(void) - Get matched substring of the last time */ +/* {{{ Get matched substring of the last time */ PHP_FUNCTION(mb_ereg_search_getregs) { - size_t n, i, len, beg, end; + size_t n, i, len; + /* Stored as int* in the OnigRegion struct */ + int beg, end; OnigUChar *str; + if (zend_parse_parameters_none() == FAILURE) { + RETURN_THROWS(); + } + if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) { array_init(return_value); @@ -1653,22 +1576,24 @@ PHP_FUNCTION(mb_ereg_search_getregs) } /* }}} */ -/* {{{ proto int mb_ereg_search_getpos(void) - Get search start position */ +/* {{{ Get search start position */ PHP_FUNCTION(mb_ereg_search_getpos) { + if (zend_parse_parameters_none() == FAILURE) { + RETURN_THROWS(); + } + RETVAL_LONG(MBREX(search_pos)); } /* }}} */ -/* {{{ proto bool mb_ereg_search_setpos(int position) - Set search start position */ +/* {{{ Set search start position */ PHP_FUNCTION(mb_ereg_search_setpos) { zend_long position; if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) { - return; + RETURN_THROWS(); } /* Accept negative position if length of search string can be determined */ @@ -1701,25 +1626,26 @@ static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *sy } /* }}} */ -/* {{{ proto string mb_regex_set_options([string options]) - Set or get the default options for mbregex functions */ +/* {{{ Set or get the default options for mbregex functions */ PHP_FUNCTION(mb_regex_set_options) { - OnigOptionType opt; - OnigSyntaxType *syntax; + OnigOptionType opt, prev_opt; + OnigSyntaxType *syntax, *prev_syntax; char *string = NULL; size_t string_len; char buf[16]; if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &string, &string_len) == FAILURE) { - RETURN_FALSE; + RETURN_THROWS(); } if (string != NULL) { opt = 0; syntax = NULL; _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL); - _php_mb_regex_set_options(opt, syntax, NULL, NULL); + _php_mb_regex_set_options(opt, syntax, &prev_opt, &prev_syntax); + opt = prev_opt; + syntax = prev_syntax; } else { opt = MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); |