diff options
Diffstat (limited to 'ext/mbstring/php_mbregex.c')
-rw-r--r-- | ext/mbstring/php_mbregex.c | 410 |
1 files changed, 161 insertions, 249 deletions
diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index 76aff4f244..45aac7baf2 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -1,7 +1,5 @@ /* +----------------------------------------------------------------------+ - | PHP Version 7 | - +----------------------------------------------------------------------+ | Copyright (c) The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | @@ -16,25 +14,24 @@ +----------------------------------------------------------------------+ */ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif +#include "libmbfl/config.h" #include "php.h" #include "php_ini.h" -#if HAVE_MBREGEX +#ifdef HAVE_MBREGEX #include "zend_smart_str.h" #include "ext/standard/info.h" #include "php_mbregex.h" #include "mbstring.h" +#include "libmbfl/filters/mbfilter_utf8.h" #include "php_onig_compat.h" /* must come prior to the oniguruma header */ #include <oniguruma.h> #undef UChar -#if ONIGURUMA_VERSION_INT < 60800 +#if !defined(ONIGURUMA_VERSION_INT) || ONIGURUMA_VERSION_INT < 60800 typedef void OnigMatchParam; #define onig_new_match_param() (NULL) #define onig_initialize_match_param(x) (void)(x) @@ -52,6 +49,7 @@ ZEND_EXTERN_MODULE_GLOBALS(mbstring) struct _zend_mb_regex_globals { OnigEncoding default_mbctype; OnigEncoding current_mbctype; + const mbfl_encoding *current_mbctype_mbfl_encoding; HashTable ht_rc; zval search_str; zval *search_str_val; @@ -75,6 +73,7 @@ static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals) { pglobals->default_mbctype = ONIG_ENCODING_UTF8; pglobals->current_mbctype = ONIG_ENCODING_UTF8; + pglobals->current_mbctype_mbfl_encoding = &mbfl_encoding_utf8; ZVAL_UNDEF(&pglobals->search_str); pglobals->search_re = (php_mb_regex_t*)NULL; pglobals->search_pos = 0; @@ -85,12 +84,6 @@ static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals) } /* }}} */ -/* {{{ _php_mb_regex_globals_dtor */ -static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals) -{ -} -/* }}} */ - /* {{{ php_mb_regex_globals_alloc */ zend_mb_regex_globals *php_mb_regex_globals_alloc(void) { @@ -110,7 +103,6 @@ void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals) if (!pglobals) { return; } - _php_mb_regex_globals_dtor(pglobals); pefree(pglobals, 1); } /* }}} */ @@ -150,6 +142,7 @@ PHP_RINIT_FUNCTION(mb_regex) PHP_RSHUTDOWN_FUNCTION(mb_regex) { MBREX(current_mbctype) = MBREX(default_mbctype); + MBREX(current_mbctype_mbfl_encoding) = mbfl_name2encoding(php_mb_regex_get_default_mbctype()); if (!Z_ISUNDEF(MBREX(search_str))) { zval_ptr_dtor(&MBREX(search_str)); @@ -274,91 +267,91 @@ static const php_mb_regex_enc_name_map_t enc_name_map[] = { #endif #ifdef ONIG_ENCODING_ISO_8859_1 { - "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0", + "ISO-8859-1\0ISO8859-1\0", ONIG_ENCODING_ISO_8859_1 }, #endif #ifdef ONIG_ENCODING_ISO_8859_2 { - "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0", + "ISO-8859-2\0ISO8859-2\0", ONIG_ENCODING_ISO_8859_2 }, #endif #ifdef ONIG_ENCODING_ISO_8859_3 { - "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0", + "ISO-8859-3\0ISO8859-3\0", ONIG_ENCODING_ISO_8859_3 }, #endif #ifdef ONIG_ENCODING_ISO_8859_4 { - "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0", + "ISO-8859-4\0ISO8859-4\0", ONIG_ENCODING_ISO_8859_4 }, #endif #ifdef ONIG_ENCODING_ISO_8859_5 { - "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0", + "ISO-8859-5\0ISO8859-5\0", ONIG_ENCODING_ISO_8859_5 }, #endif #ifdef ONIG_ENCODING_ISO_8859_6 { - "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0", + "ISO-8859-6\0ISO8859-6\0", ONIG_ENCODING_ISO_8859_6 }, #endif #ifdef ONIG_ENCODING_ISO_8859_7 { - "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0", + "ISO-8859-7\0ISO8859-7\0", ONIG_ENCODING_ISO_8859_7 }, #endif #ifdef ONIG_ENCODING_ISO_8859_8 { - "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0", + "ISO-8859-8\0ISO8859-8\0", ONIG_ENCODING_ISO_8859_8 }, #endif #ifdef ONIG_ENCODING_ISO_8859_9 { - "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0", + "ISO-8859-9\0ISO8859-9\0", ONIG_ENCODING_ISO_8859_9 }, #endif #ifdef ONIG_ENCODING_ISO_8859_10 { - "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0", + "ISO-8859-10\0ISO8859-10\0", ONIG_ENCODING_ISO_8859_10 }, #endif #ifdef ONIG_ENCODING_ISO_8859_11 { - "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0", + "ISO-8859-11\0ISO8859-11\0", ONIG_ENCODING_ISO_8859_11 }, #endif #ifdef ONIG_ENCODING_ISO_8859_13 { - "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0", + "ISO-8859-13\0ISO8859-13\0", ONIG_ENCODING_ISO_8859_13 }, #endif #ifdef ONIG_ENCODING_ISO_8859_14 { - "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0", + "ISO-8859-14\0ISO8859-14\0", ONIG_ENCODING_ISO_8859_14 }, #endif #ifdef ONIG_ENCODING_ISO_8859_15 { - "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0", + "ISO-8859-15\0ISO8859-15\0", ONIG_ENCODING_ISO_8859_15 }, #endif #ifdef ONIG_ENCODING_ISO_8859_16 { - "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0", + "ISO-8859-16\0ISO8859-16\0", ONIG_ENCODING_ISO_8859_16 }, #endif @@ -417,6 +410,7 @@ int php_mb_regex_set_mbctype(const char *encname) return FAILURE; } MBREX(current_mbctype) = mbctype; + MBREX(current_mbctype_mbfl_encoding) = mbfl_name2encoding(encname); return SUCCESS; } /* }}} */ @@ -440,6 +434,13 @@ const char *php_mb_regex_get_mbctype(void) } /* }}} */ +/* {{{ php_mb_regex_get_mbctype_encoding */ +const mbfl_encoding *php_mb_regex_get_mbctype_encoding(void) +{ + return MBREX(current_mbctype_mbfl_encoding); +} +/* }}} */ + /* {{{ php_mb_regex_get_default_mbctype */ const char *php_mb_regex_get_default_mbctype(void) { @@ -451,14 +452,15 @@ const char *php_mb_regex_get_default_mbctype(void) * regex cache */ /* {{{ php_mbregex_compile_pattern */ -static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, size_t patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax) +static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, size_t patlen, OnigOptionType options, OnigSyntaxType *syntax) { int err_code = 0; php_mb_regex_t *retval = NULL, *rc = NULL; OnigErrorInfo err_info; OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; + OnigEncoding enc = MBREX(current_mbctype); - if (!php_mb_check_encoding(pattern, patlen, _php_mb_regex_mbctype2name(enc))) { + if (!php_mb_check_encoding(pattern, patlen, php_mb_regex_get_mbctype_encoding())) { php_error_docref(NULL, E_WARNING, "Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc)); return NULL; @@ -589,8 +591,8 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionT /* }}} */ /* {{{ _php_mb_regex_init_options */ -static void -_php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval) +static bool _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option, + OnigSyntaxType **syntax) { size_t n; char c; @@ -648,15 +650,14 @@ _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option case 'd': *syntax = ONIG_SYNTAX_POSIX_EXTENDED; break; - case 'e': - if (eval != NULL) *eval = 1; - break; default: - break; + zend_value_error("Option \"%c\" is not supported", c); + return false; } } if (option != NULL) *option|=optm; } + return true; } /* }}} */ @@ -757,6 +758,7 @@ static inline void mb_regex_substitute( p++; break; case 'k': + { clen = (int) php_mb_mbchar_bytes_ex(++p, enc); if (clen != 1 || p == eos || (p[0] != '<' && p[0] != '\'')) { /* not a backref delimiter */ @@ -806,6 +808,7 @@ static inline void mb_regex_substitute( } no = onig_name_to_backref_number(regexp, (OnigUChar *)name, (OnigUChar *)name_end, regs); break; + } default: /* We're not treating \ as an escape character and will interpret something like * \\1 as \ followed by \1, rather than \\ followed by 1. This is because this @@ -833,35 +836,28 @@ static inline void mb_regex_substitute( * php functions */ -/* {{{ proto string mb_regex_encoding([string encoding]) - Returns the current encoding for regex as a string. */ +/* {{{ Returns the current encoding for regex as a string. */ PHP_FUNCTION(mb_regex_encoding) { char *encoding = NULL; size_t encoding_len; - OnigEncoding mbctype; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &encoding, &encoding_len) == FAILURE) { - return; + if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!", &encoding, &encoding_len) == FAILURE) { + RETURN_THROWS(); } if (!encoding) { - const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype)); - - if (retval == NULL) { - RETURN_FALSE; - } + const char *retval = php_mb_regex_get_mbctype(); + ZEND_ASSERT(retval != NULL); - RETURN_STRING((char *)retval); + RETURN_STRING(retval); } else { - mbctype = _php_mb_regex_name2mbctype(encoding); - - if (mbctype == ONIG_ENCODING_UNDEF) { - php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding); - RETURN_FALSE; + if (php_mb_regex_set_mbctype(encoding) == FAILURE) { + zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", encoding); + RETURN_THROWS(); } - MBREX(current_mbctype) = mbctype; + /* TODO Make function return previous encoding? */ RETURN_TRUE; } } @@ -890,30 +886,35 @@ static int _php_mb_onig_search(regex_t* reg, const OnigUChar* str, const OnigUCh /* {{{ _php_mb_regex_ereg_exec */ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) { - zval *arg_pattern, *array = NULL; - char *string; - size_t string_len; + zval *array = NULL; + char *arg_pattern, *string; + size_t arg_pattern_len, string_len; php_mb_regex_t *re; OnigRegion *regs = NULL; int i, match_len, beg, end; OnigOptionType options; char *str; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) { - RETURN_FALSE; + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_pattern, &arg_pattern_len, &string, &string_len, &array) == FAILURE) { + RETURN_THROWS(); + } + + if (arg_pattern_len == 0) { + zend_argument_value_error(1, "must not be empty"); + RETURN_THROWS(); } if (array != NULL) { array = zend_try_array_init(array); if (!array) { - return; + RETURN_THROWS(); } } if (!php_mb_check_encoding( string, string_len, - _php_mb_regex_mbctype2name(MBREX(current_mbctype)) + php_mb_regex_get_mbctype_encoding() )) { RETURN_FALSE; } @@ -923,25 +924,7 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) options |= ONIG_OPTION_IGNORECASE; } - /* compile the regular expression from the supplied regex */ - if (Z_TYPE_P(arg_pattern) != IS_STRING) { - /* we convert numbers to integers and treat them as a string */ - if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) { - convert_to_long_ex(arg_pattern); /* get rid of decimal places */ - } - if (!try_convert_to_string(arg_pattern)) { - return; - } - /* don't bother doing an extended regex with just a number */ - } - - if (Z_STRLEN_P(arg_pattern) == 0) { - php_error_docref(NULL, E_WARNING, "empty pattern"); - RETVAL_FALSE; - goto out; - } - - re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax)); + re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(regex_default_syntax)); if (re == NULL) { RETVAL_FALSE; goto out; @@ -987,16 +970,14 @@ out: } /* }}} */ -/* {{{ proto int mb_ereg(string pattern, string string [, array registers]) - Regular expression match for multibyte string */ +/* {{{ Regular expression match for multibyte string */ PHP_FUNCTION(mb_ereg) { _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); } /* }}} */ -/* {{{ proto int mb_eregi(string pattern, string string [, array registers]) - Case-insensitive regular expression match for multibyte string */ +/* {{{ Case-insensitive regular expression match for multibyte string */ PHP_FUNCTION(mb_eregi) { _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); @@ -1006,8 +987,6 @@ PHP_FUNCTION(mb_eregi) /* {{{ _php_mb_regex_ereg_replace_exec */ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable) { - zval *arg_pattern_zval; - char *arg_pattern; size_t arg_pattern_len; @@ -1026,91 +1005,58 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp smart_str out_buf = {0}; smart_str eval_buf = {0}; smart_str *pbuf; - int err, eval, n; + int err, n; OnigUChar *pos; OnigUChar *string_lim; char *description = NULL; - char pat_buf[6]; - const mbfl_encoding *enc; + const mbfl_encoding *enc = php_mb_regex_get_mbctype_encoding(); + ZEND_ASSERT(enc != NULL); { - const char *current_enc_name; - current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype)); - if (current_enc_name == NULL || - (enc = mbfl_name2encoding(current_enc_name)) == NULL) { - php_error_docref(NULL, E_WARNING, "Unknown error"); - RETURN_FALSE; - } - } - eval = 0; - { char *option_str = NULL; size_t option_str_len = 0; if (!is_callable) { - if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s", - &arg_pattern_zval, + if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|s!", + &arg_pattern, &arg_pattern_len, &replace, &replace_len, &string, &string_len, &option_str, &option_str_len) == FAILURE) { - RETURN_FALSE; + RETURN_THROWS(); } } else { - if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s", - &arg_pattern_zval, + if (zend_parse_parameters(ZEND_NUM_ARGS(), "sfs|s!", + &arg_pattern, &arg_pattern_len, &arg_replace_fci, &arg_replace_fci_cache, &string, &string_len, &option_str, &option_str_len) == FAILURE) { - RETURN_FALSE; + RETURN_THROWS(); } } - if (!php_mb_check_encoding( - string, - string_len, - _php_mb_regex_mbctype2name(MBREX(current_mbctype)) - )) { + if (!php_mb_check_encoding(string, string_len, enc)) { RETURN_NULL(); } if (option_str != NULL) { - _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval); + /* Initialize option and in case of failure it means there is a value error */ + if (!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax)) { + RETURN_THROWS(); + } } else { options |= MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); } } - if (eval && !is_callable) { - php_error_docref(NULL, E_DEPRECATED, "The 'e' option is deprecated, use mb_ereg_replace_callback instead"); - } - if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) { - arg_pattern = Z_STRVAL_P(arg_pattern_zval); - arg_pattern_len = Z_STRLEN_P(arg_pattern_zval); - } else { - php_error_docref(NULL, E_DEPRECATED, - "Non-string patterns will be interpreted as strings in the future. " - "Use an explicit chr() call to preserve the current behavior"); - - /* FIXME: this code is not multibyte aware! */ - convert_to_long_ex(arg_pattern_zval); - pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval); - pat_buf[1] = '\0'; - pat_buf[2] = '\0'; - pat_buf[3] = '\0'; - pat_buf[4] = '\0'; - pat_buf[5] = '\0'; - - arg_pattern = pat_buf; - arg_pattern_len = 1; - } + /* create regex pattern buffer */ - re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax); + re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, syntax); if (re == NULL) { RETURN_FALSE; } - if (eval || is_callable) { + if (is_callable) { pbuf = &eval_buf; description = zend_make_compiled_string_description("mbregex replace"); } else { @@ -1118,13 +1064,6 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp description = NULL; } - if (is_callable) { - if (eval) { - php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback"); - RETURN_FALSE; - } - } - /* do the actual work */ err = 0; pos = (OnigUChar *)string; @@ -1146,35 +1085,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp mb_regex_substitute(pbuf, string, string_len, replace, replace_len, re, regs, enc); } - if (eval) { - zval v; - zend_string *eval_str; - /* null terminate buffer */ - smart_str_0(&eval_buf); - - if (eval_buf.s) { - eval_str = eval_buf.s; - } else { - eval_str = ZSTR_EMPTY_ALLOC(); - } - - /* do eval */ - if (zend_eval_stringl(ZSTR_VAL(eval_str), ZSTR_LEN(eval_str), &v, description) == FAILURE) { - efree(description); - zend_throw_error(NULL, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_str)); - onig_region_free(regs, 1); - smart_str_free(&out_buf); - smart_str_free(&eval_buf); - RETURN_FALSE; - } - - /* result of eval */ - convert_to_string(&v); - smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v)); - /* Clean up */ - smart_str_free(&eval_buf); - zval_ptr_dtor_str(&v); - } else if (is_callable) { + if (is_callable) { zval args[1]; zval subpats, retval; int i; @@ -1203,7 +1114,9 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp zval_ptr_dtor(&retval); } else { if (!EG(exception)) { - php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function"); + zend_throw_error(NULL, "Unable to call custom replacement function"); + zval_ptr_dtor(&subpats); + RETURN_THROWS(); } } zval_ptr_dtor(&subpats); @@ -1247,32 +1160,28 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp } /* }}} */ -/* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option]) - Replace regular expression for multibyte string */ +/* {{{ Replace regular expression for multibyte string */ PHP_FUNCTION(mb_ereg_replace) { _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0); } /* }}} */ -/* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string) - Case insensitive replace regular expression for multibyte string */ +/* {{{ Case insensitive replace regular expression for multibyte string */ PHP_FUNCTION(mb_eregi_replace) { _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0); } /* }}} */ -/* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option]) - regular expression for multibyte string using replacement callback */ +/* {{{ regular expression for multibyte string using replacement callback */ PHP_FUNCTION(mb_ereg_replace_callback) { _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1); } /* }}} */ -/* {{{ proto array mb_split(string pattern, string string [, int limit]) - split multibyte string into array by regular expression */ +/* {{{ split multibyte string into array by regular expression */ PHP_FUNCTION(mb_split) { char *arg_pattern; @@ -1287,20 +1196,19 @@ PHP_FUNCTION(mb_split) zend_long count = -1; if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) { - RETURN_FALSE; + RETURN_THROWS(); } if (count > 0) { count--; } - if (!php_mb_check_encoding(string, string_len, - _php_mb_regex_mbctype2name(MBREX(current_mbctype)))) { + if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) { RETURN_FALSE; } /* create regex pattern buffer */ - if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) { + if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(regex_default_syntax))) == NULL) { RETURN_FALSE; } @@ -1337,6 +1245,7 @@ PHP_FUNCTION(mb_split) onig_region_free(regs, 1); /* see if we encountered an error */ + // ToDo investigate if this can actually/should happen ... if (err <= -2) { OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(err_str, err); @@ -1355,8 +1264,7 @@ PHP_FUNCTION(mb_split) } /* }}} */ -/* {{{ proto bool mb_ereg_match(string pattern, string string [,string option]) - Regular expression match for multibyte string */ +/* {{{ Regular expression match for multibyte string */ PHP_FUNCTION(mb_ereg_match) { char *arg_pattern; @@ -1375,26 +1283,27 @@ PHP_FUNCTION(mb_ereg_match) char *option_str = NULL; size_t option_str_len = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s!", &arg_pattern, &arg_pattern_len, &string, &string_len, &option_str, &option_str_len)==FAILURE) { - RETURN_FALSE; + RETURN_THROWS(); } if (option_str != NULL) { - _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL); + if(!_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax)) { + RETURN_THROWS(); + } } else { option |= MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); } } - if (!php_mb_check_encoding(string, string_len, - _php_mb_regex_mbctype2name(MBREX(current_mbctype)))) { + if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) { RETURN_FALSE; } - if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) { + if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { RETURN_FALSE; } @@ -1419,23 +1328,24 @@ PHP_FUNCTION(mb_ereg_match) /* regex search */ /* {{{ _php_mb_regex_ereg_search_exec */ -static void -_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) +static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) { char *arg_pattern = NULL, *arg_options = NULL; size_t arg_pattern_len, arg_options_len; int err; - size_t n, i, pos, len, beg, end; + size_t n, i, pos, len; + /* Stored as int* in the OnigRegion struct */ + int beg, end; OnigOptionType option = 0; OnigUChar *str; OnigSyntaxType *syntax; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) { - return; + if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!s!", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) { + RETURN_THROWS(); } if (arg_options) { - _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL); + _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax); } else { option |= MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); @@ -1448,7 +1358,7 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) if (arg_pattern) { /* create regex pattern buffer */ - if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) { + if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { RETURN_FALSE; } } @@ -1462,13 +1372,13 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) } if (MBREX(search_re) == NULL) { - php_error_docref(NULL, E_WARNING, "No regex given"); - RETURN_FALSE; + zend_throw_error(NULL, "No pattern was provided"); + RETURN_THROWS(); } if (str == NULL) { - php_error_docref(NULL, E_WARNING, "No string given"); - RETURN_FALSE; + zend_throw_error(NULL, "No string was provided"); + RETURN_THROWS(); } MBREX(search_regs) = onig_region_new(); @@ -1532,61 +1442,56 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) } /* }}} */ -/* {{{ proto bool mb_ereg_search([string pattern[, string option]]) - Regular expression search for multibyte string */ +/* {{{ Regular expression search for multibyte string */ PHP_FUNCTION(mb_ereg_search) { _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); } /* }}} */ -/* {{{ proto array mb_ereg_search_pos([string pattern[, string option]]) - Regular expression search for multibyte string */ +/* {{{ Regular expression search for multibyte string */ PHP_FUNCTION(mb_ereg_search_pos) { _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); } /* }}} */ -/* {{{ proto array mb_ereg_search_regs([string pattern[, string option]]) - Regular expression search for multibyte string */ +/* {{{ Regular expression search for multibyte string */ PHP_FUNCTION(mb_ereg_search_regs) { _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2); } /* }}} */ -/* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]]) - Initialize string and regular expression for search. */ +/* {{{ Initialize string and regular expression for search. */ PHP_FUNCTION(mb_ereg_search_init) { - int argc = ZEND_NUM_ARGS(); zend_string *arg_str; char *arg_pattern = NULL, *arg_options = NULL; size_t arg_pattern_len = 0, arg_options_len = 0; OnigSyntaxType *syntax = NULL; OnigOptionType option; - if (zend_parse_parameters(argc, "S|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) { - return; + if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|s!s!", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) { + RETURN_THROWS(); } - if (argc > 1 && arg_pattern_len == 0) { - php_error_docref(NULL, E_WARNING, "Empty pattern"); - RETURN_FALSE; + if (arg_pattern && arg_pattern_len == 0) { + zend_argument_value_error(2, "must not be empty"); + RETURN_THROWS(); } - option = MBREX(regex_default_options); - syntax = MBREX(regex_default_syntax); - - if (argc == 3) { + if (arg_options) { option = 0; - _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL); + _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax); + } else { + option = MBREX(regex_default_options); + syntax = MBREX(regex_default_syntax); } - if (argc > 1) { + if (arg_pattern) { /* create regex pattern buffer */ - if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) { + if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { RETURN_FALSE; } } @@ -1597,11 +1502,7 @@ PHP_FUNCTION(mb_ereg_search_init) ZVAL_STR_COPY(&MBREX(search_str), arg_str); - if (php_mb_check_encoding( - ZSTR_VAL(arg_str), - ZSTR_LEN(arg_str), - _php_mb_regex_mbctype2name(MBREX(current_mbctype)) - )) { + if (php_mb_check_encoding(ZSTR_VAL(arg_str), ZSTR_LEN(arg_str), php_mb_regex_get_mbctype_encoding())) { MBREX(search_pos) = 0; RETVAL_TRUE; } else { @@ -1616,13 +1517,18 @@ PHP_FUNCTION(mb_ereg_search_init) } /* }}} */ -/* {{{ proto array mb_ereg_search_getregs(void) - Get matched substring of the last time */ +/* {{{ Get matched substring of the last time */ PHP_FUNCTION(mb_ereg_search_getregs) { - size_t n, i, len, beg, end; + size_t n, i, len; + /* Stored as int* in the OnigRegion struct */ + int beg, end; OnigUChar *str; + if (zend_parse_parameters_none() == FAILURE) { + RETURN_THROWS(); + } + if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) { array_init(return_value); @@ -1648,27 +1554,30 @@ PHP_FUNCTION(mb_ereg_search_getregs) onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args); } } else { + // TODO This seems to be some logical error, promote to Error RETVAL_FALSE; } } /* }}} */ -/* {{{ proto int mb_ereg_search_getpos(void) - Get search start position */ +/* {{{ Get search start position */ PHP_FUNCTION(mb_ereg_search_getpos) { + if (zend_parse_parameters_none() == FAILURE) { + RETURN_THROWS(); + } + RETVAL_LONG(MBREX(search_pos)); } /* }}} */ -/* {{{ proto bool mb_ereg_search_setpos(int position) - Set search start position */ +/* {{{ Set search start position */ PHP_FUNCTION(mb_ereg_search_setpos) { zend_long position; if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) { - return; + RETURN_THROWS(); } /* Accept negative position if length of search string can be determined */ @@ -1677,12 +1586,12 @@ PHP_FUNCTION(mb_ereg_search_setpos) } if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) { - php_error_docref(NULL, E_WARNING, "Position is out of range"); - MBREX(search_pos) = 0; - RETURN_FALSE; + zend_argument_value_error(1, "is out of range"); + RETURN_THROWS(); } MBREX(search_pos) = position; + // TODO Return void RETURN_TRUE; } /* }}} */ @@ -1701,25 +1610,28 @@ static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *sy } /* }}} */ -/* {{{ proto string mb_regex_set_options([string options]) - Set or get the default options for mbregex functions */ +/* {{{ Set or get the default options for mbregex functions */ PHP_FUNCTION(mb_regex_set_options) { - OnigOptionType opt; - OnigSyntaxType *syntax; + OnigOptionType opt, prev_opt; + OnigSyntaxType *syntax, *prev_syntax; char *string = NULL; size_t string_len; char buf[16]; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", + if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!", &string, &string_len) == FAILURE) { - RETURN_FALSE; + RETURN_THROWS(); } if (string != NULL) { opt = 0; syntax = NULL; - _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL); - _php_mb_regex_set_options(opt, syntax, NULL, NULL); + if(!_php_mb_regex_init_options(string, string_len, &opt, &syntax)) { + RETURN_THROWS(); + } + _php_mb_regex_set_options(opt, syntax, &prev_opt, &prev_syntax); + opt = prev_opt; + syntax = prev_syntax; } else { opt = MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); |